In [13]:
import implementation as imp
from created_helpers import *
from proj1_helpers import *

In [2]:
y_train, x_train, ids_train = load_csv_data("train.csv")
y_test, x_test, ids_test = load_csv_data("test.csv")

In [16]:
def ridge_regression(y, tx, lambda_):
    """implement ridge regression using normal equations."""
    
    aI = 2 * tx.shape[0] * lambda_ * np.identity(tx.shape[1])
    a = tx.T.dot(tx) + aI
    b = tx.T.dot(y)
    
    optimal_weight = np.linalg.solve(a, b)
    loss = ridge_regression_cost(y, tx, optimal_weight, lambda_)
    return (optimal_weight, loss)

def calculate_mse(e):
    """Calculate the mse for vector e."""
    return (1/2) * np.mean(e**2)

def compute_loss(y, tx, w):
    """Calculate the loss.
    You can calculate the loss using mse or mae.
    """
    e = y - tx.dot(w)
    return calculate_mse(e)

def ridge_regression_cost(y, tx, w, lambda_):
    return compute_loss(y, tx, w) + lambda_*np.sum(np.square(w))

In [55]:
x_train2 = standardize(x_train)
initial_w = np.zeros((x_train2.shape[1],1))
max_iters = 22
gamma = 0.01
w_ridge, loss_ridge = least_squares_GD(y_train, x_train,  initial_w, max_iters, gamma)

MemoryError: 

In [53]:
def accuracy(y_pred, y):
    assert len(y_pred) == len(y)
    correct = 0
    for i in range(len(y_pred)):
        if y_pred[i] == y[i]:
            correct += 1
    return correct / len(y)

y_pred = predict_labels(w_ridge, x_train)
accuracy(y_pred, y_train)

0.744328

In [40]:
def standardize(x):
    ''' fill your code in here...
    '''
    centered_data = x - np.mean(x, axis=0)
    std_data = centered_data / np.std(centered_data, axis=0)
    return std_data

# x_train2 = standardize(x_train)
# initial_w = np.zeros((x_train2.shape[1],1))
# max_iters = 10000
# gamma = 0.00000004

def calculate_loss_logistic(y, tx, w):
    """compute the cost by negative log likelihood."""
    pred = sigmoid(tx.dot(w))
    loss = y.T.dot(np.log(pred)) + (1 - y).T.dot(np.log(1 - pred))
    return np.squeeze(- loss)

def calculate_gradient_logistic(y, tx, w):
    """compute the gradient of loss."""
    pred = sigmoid(tx.dot(w))
    grad = tx.T.dot(pred - y)
    return grad

def logistic_regression(y, tx, initial_w, max_iters, gamma):
    
    """ Logistic regression using gradient descent"""
    y = np.expand_dims(y, axis=1) # assume that y is unchanged when loaded from data
    w = initial_w
    loss_prev = 0
    for n_iter in range(max_iters):
        loss = calculate_loss_logistic(y, tx, w)
        
        #convergence criteria
        if abs(loss_prev - loss) < 0.00001:
            break
        if np.isnan(loss):
            break
        print(loss, " ", loss_prev, " ", n_iter)
        gradient = calculate_gradient_logistic(y, tx, w)
        w = w - gamma * gradient
        loss_prev = loss
    return w, loss

w_log, loss_log = logistic_regression(y_train, x_train2, initial_w, max_iters, gamma)

173286.79513998525   0   0
171745.28987987677   173286.79513998525   1
170265.82300600293   171745.28987987677   2
168844.90942411072   170265.82300600293   3
167479.2456858964   168844.90942411072   4
166165.7059111501   167479.2456858964   5
164901.33676884748   166165.7059111501   6
163683.35172306746   164901.33676884748   7
162509.12472807936   163683.35172306746   8
161376.18353486236   162509.12472807936   9
160282.2027495482   161376.18353486236   10
159224.9967634201   160282.2027495482   11
158202.51265462898   159224.9967634201   12
157212.82314398437   158202.51265462898   13
156254.11967119796   157212.82314398437   14
155324.70564386196   156254.11967119796   15
154422.98989918857   155324.70564386196   16
153547.48040803353   154422.98989918857   17
152696.77824185285   153547.48040803353   18
151869.57181583645   152696.77824185285   19
151064.6314153771   151869.57181583645   20
150280.80400808636   151064.6314153771   21
149517.0083396403   150280.80400808636   22
148

96062.54149802671   96236.6336339759   188
95889.27241269173   96062.54149802671   189
95716.81950364416   95889.27241269173   190
95545.17597457414   95716.81950364416   191
95374.33510602306   95545.17597457414   192
95204.29025424138   95374.33510602306   193
95035.03485006982   95204.29025424138   194
94866.56239784505   95035.03485006982   195
94698.86647432575   94866.56239784505   196
94531.94072764135   94698.86647432575   197
94365.778876261   94531.94072764135   198
94200.3747079849   94365.778876261   199
94035.72207895207   94200.3747079849   200
93871.81491267   94035.72207895207   201
93708.6471990625   93871.81491267   202
93546.21299353478   93708.6471990625   203
93384.50641605692   93546.21299353478   204
93223.52165026462   93384.50641605692   205
93063.25294257567   93223.52165026462   206
92903.69460132417   93063.25294257567   207
92744.84099590959   92903.69460132417   208
92586.68655596126   92744.84099590959   209
92429.22577051813   92586.68655596126   210
922

72518.39516850104   72606.74659392523   381
72430.27338159273   72518.39516850104   382
72342.38003069005   72430.27338159273   383
72254.71392204016   72342.38003069005   384
72167.27387056549   72254.71392204016   385
72080.05869978585   72167.27387056549   386
71993.06724173963   72080.05869978585   387
71906.2983369052   71993.06724173963   388
71819.75083412597   71906.2983369052   389
71733.42359053346   71819.75083412597   390
71647.31547147149   71733.42359053346   391
71561.4253504228   71647.31547147149   392
71475.75210893605   71561.4253504228   393
71390.29463655213   71475.75210893605   394
71305.05183073189   71390.29463655213   395
71220.02259678656   71305.05183073189   396
71135.2058478061   71220.02259678656   397
71050.60050459014   71135.2058478061   398
70966.20549557795   71050.60050459014   399
70882.01975678233   70966.20549557795   400
70798.0422317209   70882.01975678233   401
70714.27187134916   70798.0422317209   402
70630.70763399472   70714.27187134916   

58696.21809871518   58755.36840980951   574
58637.1659668214   58696.21809871518   575
58578.21165103599   58637.1659668214   576
58519.35479019678   58578.21165103599   577
58460.59502503433   58519.35479019678   578
58401.93199816908   58460.59502503433   579
58343.36535408776   58401.93199816908   580
58284.89473915813   58343.36535408776   581
58226.51980157214   58284.89473915813   582
58168.24019138617   58226.51980157214   583
58110.055560460256   58168.24019138617   584
58051.96556248341   58110.055560460256   585
57993.969852928014   58051.96556248341   586
57936.06808906977   57993.969852928014   587
57878.25992995384   57936.06808906977   588
57820.54503640547   57878.25992995384   589
57762.92307097197   57820.54503640547   590
57705.39369797503   57762.92307097197   591
57647.95658345707   57705.39369797503   592
57590.611395156564   57647.95658345707   593
57533.35780255479   57590.611395156564   594
57476.19547680463   57533.35780255479   595
57419.12409076549   57476.19

48951.5570155459   48996.99260200135   763
48906.174680055294   48951.5570155459   764
48860.84544542368   48906.174680055294   765
48815.56916131789   48860.84544542368   766
48770.345677940175   48815.56916131789   767
48725.17484645435   48770.345677940175   768
48680.056519064645   48725.17484645435   769
48634.99054757631   48680.056519064645   770
48589.97678520926   48634.99054757631   771
48545.01508530986   48589.97678520926   772
48500.10530212356   48545.01508530986   773
48455.247290370724   48500.10530212356   774
48410.440905760915   48455.247290370724   775
48365.68600298671   48410.440905760915   776
48320.982439654996   48365.68600298671   777
48276.330072292185   48320.982439654996   778
48231.72875887208   48276.330072292185   779
48187.178357261815   48231.72875887208   780
48142.678726775164   48187.178357261815   781
48098.22972599801   48142.678726775164   782
48053.831215047394   48098.22972599801   783
48009.48305425077   48053.831215047394   784
47965.18510558

41103.00360857652   41140.43864172741   954
41065.601375185186   41103.00360857652   955
41028.23188302896   41065.601375185186   956
40990.89504412492   41028.23188302896   957
40953.59078672115   40990.89504412492   958
40916.31905382709   40953.59078672115   959
40879.07975984475   40916.31905382709   960
40841.872834767215   40879.07975984475   961
40804.69820826291   40841.872834767215   962
40767.55579558443   40804.69820826291   963
40730.44552680431   40767.55579558443   964
40693.36735068663   40730.44552680431   965
40656.32116639032   40693.36735068663   966
40619.30690616538   40656.32116639032   967
40582.3245206282   40619.30690616538   968
40545.373926368105   40582.3245206282   969
40508.45504016109   40545.373926368105   970
40471.567814797076   40508.45504016109   971
40434.71214942052   40471.567814797076   972
40397.887998135906   40434.71214942052   973
40361.09529841819   40397.887998135906   974
40324.333949095686   40361.09529841819   975
40287.60390790459   403

34609.0031999083   34641.376766309724   1141
34576.65246183131   34609.0031999083   1142
34544.32346927203   34576.65246183131   1143
34512.016865954094   34544.32346927203   1144
34479.7326070267   34512.016865954094   1145
34447.470283101546   34479.7326070267   1146
34415.22983517303   34447.470283101546   1147
34383.01157986419   34415.22983517303   1148
34350.81548114898   34383.01157986419   1149
34318.64109845823   34350.81548114898   1150
34286.48837142595   34318.64109845823   1151
34254.358048466616   34286.48837142595   1152
34222.24928021329   34254.358048466616   1153
34190.16200753418   34222.24928021329   1154
34158.096592504706   34190.16200753418   1155
34126.052980760986   34158.096592504706   1156
34094.03114110185   34126.052980760986   1157
34062.031025947304   34094.03114110185   1158
34030.052135517966   34062.031025947304   1159
33998.09485714964   34030.052135517966   1160
33966.15915362199   33998.09485714964   1161
33934.244981588155   33966.15915362199   116

28992.53670827247   29021.41763034009   1325
28963.672453181585   28992.53670827247   1326
28934.81834026694   28963.672453181585   1327
28905.987228857877   28934.81834026694   1328
28877.172743466275   28905.987228857877   1329
28848.368052529695   28877.172743466275   1330
28819.58666172379   28848.368052529695   1331
28790.81483981409   28819.58666172379   1332
28762.059365944937   28790.81483981409   1333
28733.327475256403   28762.059365944937   1334
28704.604811020836   28733.327475256403   1335
28675.898425453808   28704.604811020836   1336
28647.208408823586   28675.898425453808   1337
28618.534636202152   28647.208408823586   1338
28589.8772070852   28618.534636202152   1339
28561.23599056428   28589.8772070852   1340
28532.61108665634   28561.23599056428   1341
28503.9942400477   28532.61108665634   1342
28475.401729941717   28503.9942400477   1343
28446.82542608754   28475.401729941717   1344
28418.25678816845   28446.82542608754   1345
28389.71279166441   28418.25678816845

23902.208453841915   23928.52059462259   1510
23876.006080401014   23902.208453841915   1511
23849.718085238652   23876.006080401014   1512
23823.442453718162   23849.718085238652   1513
23797.28161103424   23823.442453718162   1514
23771.030246900278   23797.28161103424   1515
23744.790994726063   23771.030246900278   1516
23718.563861154777   23744.790994726063   1517
23692.457156539604   23718.563861154777   1518
23666.254105302738   23692.457156539604   1519
23640.063075005775   23666.254105302738   1520
23613.8842979201   23640.063075005775   1521
23587.831833191914   23613.8842979201   1522
23561.677007579594   23587.831833191914   1523
23535.534154635912   23561.677007579594   1524
23509.403228674433   23535.534154635912   1525
23483.405704107892   23509.403228674433   1526
23457.29879400984   23483.405704107892   1527
23431.20354586793   23457.29879400984   1528
23405.120373664016   23431.20354586793   1529
23379.178386482294   23405.120373664016   1530
23353.118868262332   233

In [41]:
y_pred = predict_labels(w_log, x_train)
accuracy(y_pred, y_train)

0.640948

In [49]:
def reg_logistic_regression(y, tx, lambda_, initial_w, max_iters, gamma):
    
    """Regularized logistic regression using gradient descent"""
    y = np.expand_dims(y, axis=1)
    w = initial_w
    loss_prev = 0
    for n_iter in range(max_iters):
        loss = calculate_loss_logistic(y, tx, w) + lambda_ * np.squeeze(w.T.dot(w))

        #convergence criteria
        if abs(loss_prev - loss) < 0.00001:
            break
        if np.isnan(loss):
            break
        print(loss, " ", loss_prev, " ", n_iter)
        gradient = calculate_gradient_logistic(y, tx, w) + 2 * lambda_ * w
        w -= gamma * gradient
        loss_prev = loss
    return w, loss


In [None]:
w_log, loss_log = reg_logistic_regression(y_train, x_train2, 0.01, initial_w, max_iters, gamma)

32799.36061072521   0   0
32768.21275945558   32799.36061072521   1
32737.08457473817   32768.21275945558   2
32705.976875295535   32737.08457473817   3
32674.88965526825   32705.976875295535   4
32643.821971395526   32674.88965526825   5
32612.773744678892   32643.821971395526   6
32581.74492028865   32612.773744678892   7
32550.73640402425   32581.74492028865   8
32519.748185535325   32550.73640402425   9
32488.779229136395   32519.748185535325   10
32457.82947104779   32488.779229136395   11
32426.899885965373   32457.82947104779   12
32395.990452444927   32426.899885965373   13
32365.10007668956   32395.990452444927   14
32334.22870176647   32365.10007668956   15
32303.377346634592   32334.22870176647   16
32272.546009490678   32303.377346634592   17
32241.733521067967   32272.546009490678   18
32210.940963381567   32241.733521067967   19
32180.167146968644   32210.940963381567   20
32149.413200404437   32180.167146968644   21
32118.67787914443   32149.413200404437   22
32087.96235

27148.7796536935   27176.660493095893   192
27120.9139865945   27148.7796536935   193
27093.063433361727   27120.9139865945   194
27065.210410269385   27093.063433361727   195
27037.389792072307   27065.210410269385   196
27009.584333062703   27037.389792072307   197
26981.775587425844   27009.584333062703   198
26954.000012958484   26981.775587425844   199
26926.239566140583   26954.000012958484   200
26898.4749413504   26926.239566140583   201
26870.744325556705   26898.4749413504   202
26843.00895356549   26870.744325556705   203
26815.308214883153   26843.00895356549   204
26787.602098786454   26815.308214883153   205
26759.931211672785   26787.602098786454   206
26732.254299299548   26759.931211672785   207
26704.59184743723   26732.254299299548   208
26676.965380755144   26704.59184743723   209
26649.332038527744   26676.965380755144   210
26621.713112011737   26649.332038527744   211
26594.13118682433   26621.713112011737   212
26566.541288387445   26594.13118682433   213
26538.

22219.375302058   22244.92820921436   377
22193.833753234117   22219.375302058   378
22168.303480648658   22193.833753234117   379
22142.784165238252   22168.303480648658   380
22117.27575509849   22142.784165238252   381
22091.77908452851   22117.27575509849   382
22066.29276807148   22091.77908452851   383
22041.08483829836   22066.29276807148   384
22015.621054822295   22041.08483829836   385
21990.168133386454   22015.621054822295   386
21964.726376364797   21990.168133386454   387
21939.29588785427   21964.726376364797   388
21913.876085102773   21939.29588785427   389
21888.466991003374   21913.876085102773   390
21863.069620143895   21888.466991003374   391
21837.68238449257   21863.069620143895   392
21812.306798703597   21837.68238449257   393
21787.250123005306   21812.306798703597   394
21761.895725553546   21787.250123005306   395
21736.55251717525   21761.895725553546   396
21711.220336154627   21736.55251717525   397
21685.898662395462   21711.220336154627   398
21660.588