In [2]:
import numpy as np
from sklearn.datasets import load_diabetes

In [3]:
dataset = load_diabetes()
print(dataset.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - age     age in years
      - sex
      - bmi     body mass index
      - bp      average blood pressure
      - s1      tc, total serum cholesterol
      - s2      ldl, low-density lipoproteins
      - s3      hdl, high-density lipoproteins
      - s4      tch, total cholesterol / HDL
      - s5      ltg, possibly log of serum triglycerides level
      - s6      glu, blood sugar level

Note: Each of these 1

In [4]:
data = dataset.data
print(data.shape)

(442, 10)


In [5]:
target = dataset.target

In [7]:
shuffle_idx = np.random.choice(data.shape[0], data.shape[0], replace=False)
shuffle_idx

array([401, 195, 190, 365, 285, 410, 422,  44, 215,  95, 431, 441, 125,
       416, 136, 380,  68, 274,  39, 254,  32, 120, 419,  13, 402, 381,
       230,  15, 247, 266,  21, 437, 145, 409, 101,  86,  29, 133, 246,
       197, 147, 374, 388, 155,  50, 337, 423, 332, 153, 146, 263, 413,
       272, 407, 201, 280, 165,  60, 144,  10, 315, 421, 346, 175,  36,
       281, 177, 224, 256,  83,  49,  87, 198, 209, 118, 400,  91,  75,
       140, 287, 307, 237,  33, 373, 349,  17, 309, 211, 321, 160, 240,
       150, 277, 141, 176,  72, 344,  76, 156, 428, 244, 289, 261, 179,
       306, 382, 288, 110, 132,  97, 168, 389,  45, 214, 270, 330, 339,
       292, 180, 350,  62, 137, 370, 250,  34, 327, 268, 336, 360,  41,
        14, 376,   2, 300, 196, 129, 151, 205, 385, 138, 107, 241,  74,
       283, 265, 294,   1, 334, 279, 143, 117, 187, 258,  18, 276,   9,
        80, 206, 238, 186, 159, 387, 267, 345, 308, 359,  16, 162, 342,
       221, 139, 378, 305, 152,  20, 252, 223, 134,  57, 429,  7

In [9]:
data[shuffle_idx]
target[shuffle_idx]

mini_batch_size = 40
num_minibatch = data.shape[0] // mini_batch_size
print(num_minibatch)

11


In [10]:
for m in range(num_minibatch):
    mini_batch = data[(m * mini_batch_size):((m+1) * mini_batch_size)]
    mini_X = mini_batch[:, :-1]
    mini_y = mini_batch[:, -1]
    print(mini_X.shape, mini_y.shape)

(40, 9) (40,)
(40, 9) (40,)
(40, 9) (40,)
(40, 9) (40,)
(40, 9) (40,)
(40, 9) (40,)
(40, 9) (40,)
(40, 9) (40,)
(40, 9) (40,)
(40, 9) (40,)
(40, 9) (40,)


In [11]:
theta = np.random.uniform(-1., 1., size=data.shape[1])
print(theta)

[ 0.28830207 -0.94014058  0.70222315  0.94550847 -0.38335358  0.26616439
  0.19396441  0.34309983 -0.17667243 -0.99802577]


In [12]:
alpha = 0.01

for m in range(num_minibatch):
    mini_X = data[(m * mini_batch_size):((m+1) * mini_batch_size)]
    mini_y = target[(m * mini_batch_size):((m+1) * mini_batch_size)]
    
    hat_y = np.dot(mini_X, theta)
    theta = theta - alpha*np.dot((hat_y - mini_y), mini_X)
    print(theta)

[-0.13260237 -0.98431181  1.33659662  0.95026266 -0.98242216 -0.71427704
  0.19168012  0.04347759  0.48699906 -1.51296896]
[-0.42089831 -0.86787557  0.64043254  0.85593188 -1.73969643 -1.34142911
  0.10096626 -0.282417    0.19933466 -1.82674581]
[-0.55542119 -1.44401247  1.29287131  0.98197464 -1.2647803  -0.90612001
 -0.08510526  0.04460056  0.65814589 -1.17477619]
[-1.07899607 -1.48737204  3.49008305  2.17241803 -1.23406898 -0.48416921
 -1.33860076  1.03774619  1.4365176  -0.10199515]
[-0.7158244  -1.35382762  3.74864525  2.42333195 -0.8645862  -0.15299455
 -2.23497874  1.93356213  2.47188906  0.72629201]
[ 0.23939355 -1.15333041  4.12496871  3.53269533 -0.20565293  0.51919396
 -2.5993535   2.56891699  3.38438508  1.8229836 ]
[ 0.85759648 -0.67346266  5.99286168  5.00150007  0.57226645  0.91540508
 -3.00916493  3.46892587  4.7422943   2.54423246]
[ 2.22636839 -0.9843944   6.40628602  5.39470123  1.60709893  2.01349783
 -3.16355182  3.97134661  5.34899453  2.71800204]
[ 2.8163236  -0.

In [13]:
alpha = 0.01
epoch = 10
theta = np.random.uniform(-1., 1., size=data.shape[1])

for iepoch in range(epoch):
    
    shuffle_idx = np.random.choice(data.shape[0], data.shape[0], replace=False)
    
    data = data[shuffle_idx]
    target = target[shuffle_idx]
    
    loss = 0.
    
    for m in range(num_minibatch):
        mini_X = data[(m * mini_batch_size):((m+1) * mini_batch_size)]
        mini_y = target[(m * mini_batch_size):((m+1) * mini_batch_size)]
        
        hat_y = np.dot(mini_X, theta)
        
        theta = theta - alpha*np.dot((hat_y-mini_y), mini_X)
        
        loss += np.mean((hat_y-mini_y)**2)
        
    print(loss / mini_batch_size)

7995.232134525417
7945.100505068798
7918.713410649876
7839.529705437266
7796.646118402628
7783.300101669726
7751.137736803144
7723.336465312905
7701.2835561764705
7675.286690028183


In [14]:
def sgd(data, target, alpha, minibatch_size, epoch, verbose=False):
    
    theta = np.random.uniform(-1., 1., size=data.shape[1])
    num_minibatch = data.shape[0] // minibatch_size
    
    for iepoch in range(epoch):
        
        shuffle_idx = np.random.choice(data.shape[0], data.shape[0], replace=False)
        
        data = data[shuffle_idx]
        target = target[shuffle_idx]
        
        loss = 0.
        
        for m in range(num_minibatch):
            mini_X = data[(m * minibatch_size):((m+1) * minibatch_size)]
            mini_y = target[(m * minibatch_size):((m+1) * minibatch_size)]
            
            hat_y = np.dot(mini_X, theta)
            
            theta = theta - alpha*np.dot((hat_y-mini_y), mini_X)
            
            loss += np.mean((hat_y-mini_y)**2)
            
        if verbose:
            print(f'epoch {iepoch+1}: loss = {(loss/minibatch_size):.4f}')
        
    return loss / minibatch_size, theta

In [15]:
data = dataset.data
target = dataset.target

alpha = 0.001
minibatch_size = data.shape[0]
epoch = 1000

sgd(data, target, alpha, minibatch_size, epoch, verbose=True)

epoch 1: loss = 65.7661
epoch 2: loss = 65.7271
epoch 3: loss = 65.6884
epoch 4: loss = 65.6500
epoch 5: loss = 65.6118
epoch 6: loss = 65.5740
epoch 7: loss = 65.5364
epoch 8: loss = 65.4990
epoch 9: loss = 65.4619
epoch 10: loss = 65.4251
epoch 11: loss = 65.3886
epoch 12: loss = 65.3523
epoch 13: loss = 65.3163
epoch 14: loss = 65.2805
epoch 15: loss = 65.2450
epoch 16: loss = 65.2097
epoch 17: loss = 65.1747
epoch 18: loss = 65.1400
epoch 19: loss = 65.1054
epoch 20: loss = 65.0712
epoch 21: loss = 65.0371
epoch 22: loss = 65.0034
epoch 23: loss = 64.9698
epoch 24: loss = 64.9365
epoch 25: loss = 64.9034
epoch 26: loss = 64.8706
epoch 27: loss = 64.8380
epoch 28: loss = 64.8056
epoch 29: loss = 64.7734
epoch 30: loss = 64.7415
epoch 31: loss = 64.7098
epoch 32: loss = 64.6783
epoch 33: loss = 64.6471
epoch 34: loss = 64.6160
epoch 35: loss = 64.5852
epoch 36: loss = 64.5546
epoch 37: loss = 64.5242
epoch 38: loss = 64.4940
epoch 39: loss = 64.4640
epoch 40: loss = 64.4343
epoch 41:

epoch 369: loss = 60.3799
epoch 370: loss = 60.3755
epoch 371: loss = 60.3713
epoch 372: loss = 60.3670
epoch 373: loss = 60.3627
epoch 374: loss = 60.3585
epoch 375: loss = 60.3543
epoch 376: loss = 60.3501
epoch 377: loss = 60.3459
epoch 378: loss = 60.3417
epoch 379: loss = 60.3376
epoch 380: loss = 60.3335
epoch 381: loss = 60.3294
epoch 382: loss = 60.3253
epoch 383: loss = 60.3212
epoch 384: loss = 60.3172
epoch 385: loss = 60.3131
epoch 386: loss = 60.3091
epoch 387: loss = 60.3051
epoch 388: loss = 60.3011
epoch 389: loss = 60.2972
epoch 390: loss = 60.2932
epoch 391: loss = 60.2893
epoch 392: loss = 60.2854
epoch 393: loss = 60.2815
epoch 394: loss = 60.2776
epoch 395: loss = 60.2737
epoch 396: loss = 60.2699
epoch 397: loss = 60.2660
epoch 398: loss = 60.2622
epoch 399: loss = 60.2584
epoch 400: loss = 60.2546
epoch 401: loss = 60.2509
epoch 402: loss = 60.2471
epoch 403: loss = 60.2434
epoch 404: loss = 60.2397
epoch 405: loss = 60.2360
epoch 406: loss = 60.2323
epoch 407: l

epoch 744: loss = 59.5024
epoch 745: loss = 59.5011
epoch 746: loss = 59.4998
epoch 747: loss = 59.4986
epoch 748: loss = 59.4973
epoch 749: loss = 59.4961
epoch 750: loss = 59.4948
epoch 751: loss = 59.4936
epoch 752: loss = 59.4923
epoch 753: loss = 59.4911
epoch 754: loss = 59.4898
epoch 755: loss = 59.4886
epoch 756: loss = 59.4873
epoch 757: loss = 59.4861
epoch 758: loss = 59.4849
epoch 759: loss = 59.4837
epoch 760: loss = 59.4824
epoch 761: loss = 59.4812
epoch 762: loss = 59.4800
epoch 763: loss = 59.4788
epoch 764: loss = 59.4776
epoch 765: loss = 59.4764
epoch 766: loss = 59.4751
epoch 767: loss = 59.4739
epoch 768: loss = 59.4727
epoch 769: loss = 59.4715
epoch 770: loss = 59.4704
epoch 771: loss = 59.4692
epoch 772: loss = 59.4680
epoch 773: loss = 59.4668
epoch 774: loss = 59.4656
epoch 775: loss = 59.4644
epoch 776: loss = 59.4632
epoch 777: loss = 59.4621
epoch 778: loss = 59.4609
epoch 779: loss = 59.4597
epoch 780: loss = 59.4586
epoch 781: loss = 59.4574
epoch 782: l

(59.25654942309549,
 array([  35.29209328, -105.47966594,  383.328064  ,  251.88750394,
           1.04629297,  -45.67268368, -190.40238773,  142.30287685,
         323.22811726,  143.43368153]))

In [25]:
data = dataset.data
target = dataset.target

alpha = 0.1
minibatch_size = 400
epoch = 1000

sgd(data, target, alpha, minibatch_size, epoch, verbose=True)

epoch 1: loss = 72.7449
epoch 2: loss = 69.3189
epoch 3: loss = 68.2880
epoch 4: loss = 67.0703
epoch 5: loss = 65.4014
epoch 6: loss = 66.8772
epoch 7: loss = 67.0139
epoch 8: loss = 65.4936
epoch 9: loss = 66.1604
epoch 10: loss = 65.5382
epoch 11: loss = 65.7912
epoch 12: loss = 65.1945
epoch 13: loss = 64.3799
epoch 14: loss = 65.1121
epoch 15: loss = 64.9283
epoch 16: loss = 66.7762
epoch 17: loss = 63.3653
epoch 18: loss = 65.5603
epoch 19: loss = 64.5854
epoch 20: loss = 64.5796
epoch 21: loss = 65.2627
epoch 22: loss = 65.1836
epoch 23: loss = 65.1308
epoch 24: loss = 64.8189
epoch 25: loss = 65.2386
epoch 26: loss = 63.8338
epoch 27: loss = 64.6418
epoch 28: loss = 65.3877
epoch 29: loss = 65.4782
epoch 30: loss = 65.5899
epoch 31: loss = 64.7045
epoch 32: loss = 64.7932
epoch 33: loss = 65.3481
epoch 34: loss = 64.9541
epoch 35: loss = 64.0008
epoch 36: loss = 65.1731
epoch 37: loss = 64.2624
epoch 38: loss = 65.2941
epoch 39: loss = 65.7639
epoch 40: loss = 65.6273
epoch 41:

epoch 641: loss = 64.2290
epoch 642: loss = 64.8671
epoch 643: loss = 64.7248
epoch 644: loss = 66.4838
epoch 645: loss = 65.8235
epoch 646: loss = 64.7384
epoch 647: loss = 64.4146
epoch 648: loss = 64.8820
epoch 649: loss = 63.8082
epoch 650: loss = 64.8697
epoch 651: loss = 64.3883
epoch 652: loss = 64.0489
epoch 653: loss = 65.1898
epoch 654: loss = 65.0014
epoch 655: loss = 64.3790
epoch 656: loss = 65.0143
epoch 657: loss = 65.4845
epoch 658: loss = 66.3371
epoch 659: loss = 64.1840
epoch 660: loss = 63.4752
epoch 661: loss = 64.9837
epoch 662: loss = 64.7005
epoch 663: loss = 65.0105
epoch 664: loss = 66.0605
epoch 665: loss = 66.6231
epoch 666: loss = 65.9123
epoch 667: loss = 64.8970
epoch 668: loss = 64.7568
epoch 669: loss = 65.2983
epoch 670: loss = 65.1850
epoch 671: loss = 63.7062
epoch 672: loss = 65.7163
epoch 673: loss = 64.6905
epoch 674: loss = 64.4952
epoch 675: loss = 65.0104
epoch 676: loss = 65.5815
epoch 677: loss = 65.5148
epoch 678: loss = 64.8172
epoch 679: l

epoch 992: loss = 65.7684
epoch 993: loss = 64.6705
epoch 994: loss = 65.6786
epoch 995: loss = 64.8884
epoch 996: loss = 63.7249
epoch 997: loss = 65.6435
epoch 998: loss = 65.2928
epoch 999: loss = 65.0983
epoch 1000: loss = 64.2732


(64.27318860954664,
 array([   4.09774567, -233.92628076,  518.50979889,  320.04808002,
        -450.00197649,  182.10218185,  -57.1676849 ,  138.05191305,
         617.77044096,   37.1474701 ]))