In [1]:
import pandas as pd
log_data = pd.read_csv('data/data-logistic.csv', header=None)

In [2]:
log_data.columns = ['y', 'x1', 'x2']

In [3]:
df = log_data
df.head()

Unnamed: 0,y,x1,x2
0,-1,-0.663827,-0.138526
1,1,1.994596,2.468025
2,-1,-1.247395,0.749425
3,1,2.309374,1.899836
4,1,0.849143,2.40775


In [4]:
len(df)

205

In [5]:
import numpy as np

In [6]:
from tqdm import tqdm

In [9]:
def z(df, w10, w20, k, C, f='x1') -> float:
    res = k * np.sum(df['y'] * df[f] * (1 - 1/(1 + np.exp(-df['y']*(w10 * df['x1'] + w20 * df['x2']))))) / len(df)
    if f == 'x1':
        res -= k * C * w10
        res += w10
    else:
        res -= k * C * w20
        res += w20
    return res

def gd(log_data: pd.DataFrame, k = 0.1, C = 0.1, r=10000) -> tuple:
    w10 = 0
    w20 = 0
    for i in tqdm(range(r), ncols=200):
        w11 = z(log_data, w10, w20, k, C, f='x1')
        w21 = z(log_data, w10, w20, k, C, f='x2')
        dist = np.linalg.norm(np.array([w10, w20]) - np.array([w11, w21]))
        if dist < 1e-5:
            return w11, w21
        print(dist)
        w10, w20 = w11, w21
    return w10, w20  # (0.028558754546234206, 0.024780137249735545)

In [10]:
w1, w2 = gd(log_data, C=10, k=0.1, r=10000) 
w1, w2

  0%|                                                                                                                                                                         | 0/10000 [00:00<?, ?it/s]

0.0482739384539
0.0132587803327
0.00349018975742
0.000935717143569
0.00024974376394
6.67386427032e-05
1.78286589294e-05





(0.028558754546234206, 0.024780137249735545)

In [11]:
def f(w1, w2, x1, x2, y) -> float:
    return 1 / (1 + np.exp((-x1 * w1 - x2 * w2)*y))

In [12]:
f(w1, w2, -0.663827, -0.138526, 1), f(w1, w2, -0.663827, -0.138526, -1), 

(0.49440254244176762, 0.50559745755823238)

In [13]:
res = [f(w1, w2, x['x1'], x['x2'], 1) for _, x in df.iterrows()]

In [14]:
from sklearn.metrics import roc_auc_score

In [15]:
roc_auc_score(df['y'], res)

0.93628571428571417

In [16]:
w1, w2

(0.028558754546234206, 0.024780137249735545)

In [17]:
w1_, w2_ = gd(df, C=0, k=0.1, r=10000)
w1_, w2_

  1%|█                                                                                                                                                              | 70/10000 [00:00<00:30, 324.09it/s]

0.0482739384539
0.0350794046557
0.026903330054
0.0215936087286
0.0178586914115
0.0150685196187
0.0128959691048
0.0111551471299
0.00973165622625
0.00855037490412
0.00755933435951
0.00672104200518
0.0060075097568
0.00539724837012
0.00487336574723
0.00442231657194
0.00403305335197
0.00369643457089
0.00340480317187
0.00315168112772
0.00293154487835
0.0027396579129
0.00257194398433
0.00242488918482
0.00229546440353
0.00218106209911
0.00207944314472
0.00198869089109
0.00190717062258
0.0018334933066
0.00176648301057
0.00170514763936
0.00164865278544
0.00159629853332
0.00154749906004
0.00150176485206
0.00145868733428
0.00141792568818
0.00137919562731
0.00134225989904
0.00130692029084
0.00127301093415
0.00124039271736
0.00120894864003
0.00117857996041
0.00114920300826
0.00112074655323
0.00109314963521
0.00106635977776
0.00104033151856
0.00101502520139
0.000990405983647
0.000966443021325
0.0009431087997
0.00092037858381
0.000898229967166
0.000876642501041
0.000855597389823
0.0008350772405
0.0008

  2%|███▌                                                                                                                                                          | 225/10000 [00:00<00:22, 429.08it/s]

0.000192059454343
0.000187499985859
0.000183048966389
0.000178703806844
0.000174461980379
0.000170321020886
0.000166278521508
0.000162332133202
0.000158479563334
0.000154718574302
0.000151046982206
0.000147462655541
0.000143963513923
0.000140547526857
0.00013721271252
0.000133957136584
0.000130778911068
0.000127676193214
0.000124647184393
0.00012169012904
0.000118803313609
0.000115985065561
0.000113233752372
0.00011054778057
0.000107925594787
0.000105365676845
0.000102866544857
0.000100426752352
9.80448874244e-05
9.5719571897e-05
9.34494605129e-05
9.1233240141e-05
8.90696290044e-05
8.69573759253e-05
8.489525959e-05
8.28820878306e-05
8.09166969253e-05
7.89979509146e-05
7.7124740935e-05
7.52959845685e-05
7.35106252079e-05
7.17676314377e-05
7.00659964298e-05
6.84047373539e-05
6.67828948026e-05
6.51995322296e-05
6.36537354023e-05
6.21446118675e-05
6.06712904294e-05
5.92329206409e-05
5.78286723073e-05
5.64577350012e-05
5.51193175898e-05
5.38126477735e-05
5.25369716354e-05
5.1291553202e-05
5




(0.28781162047177644, 0.091983302159254363)

In [18]:
res_noreg = [f(w1_, w2_, x['x1'], x['x2'], 1) for _, x in df.iterrows()]

In [19]:
roc_auc_score(df['y'], res_noreg)

0.92685714285714282

In [20]:
with open('a2.txt', 'w') as fd:
    fd.write('{:.3f} {:.3f}'.format(roc_auc_score(df['y'], res_noreg), roc_auc_score(df['y'], res)))

In [21]:
!cat a2.txt

0.927 0.936