In [69]:
import pandas as pd
import numpy as np
from sklearn import metrics

In [70]:
def euclid_dist(w, z):
    tmp = (w[0] - z[0])**2 + (w[1] - z[1])**2
    return(np.sqrt(tmp))

In [71]:
def sigmoid(x, w):
    return 1.0 / (1 + np.exp(-w[0]*x[0] - w[1]*x[1]))

In [72]:
def step_log(X, y, w, k, C):
    l = y.size
    s0 = 0
    s1 = 0
    for i in range(0, l):        
        tmp = (-y[i]) * (w[0] * X[i][0] + w[1] * X[i][1])
        tmp = 1 - 1/(1 + np.exp(tmp))
        s0 += y[i] * X[i][0] * tmp
        s1 += y[i] * X[i][1] * tmp
    w0 = w[0] + (s0 * k/l) - k * C * w[0]
    w1 = w[1] + (s1 * k/l) - k * C * w[1]
    print(w0, w1)
    return (w0, w1)    

In [73]:
def log_regr(X, y, w=(0, 0),k=0.1, C=0):
    eps = 0.00001
    max_iter = 10000
    w_old = w
    w_new = step_log(X, y, w_old, k, C)
    i = 1
    while i < max_iter and euclid_dist(w_old, w_new) >= eps :
        w_old = w_new
        w_new = step_log(X, y, w_old, k, C)
        i += 1
    print(i)
    return w_new

In [74]:
df = pd.read_csv('data-logistic.csv', header=None)
df

Unnamed: 0,0,1,2
0,-1,-0.663827,-0.138526
1,1,1.994596,2.468025
2,-1,-1.247395,0.749425
3,1,2.309374,1.899836
4,1,0.849143,2.407750
...,...,...,...
200,1,4.245176,3.053931
201,1,2.437935,1.357804
202,-1,-1.876638,1.533398
203,1,-6.824446,-13.934211


In [75]:
X = df.drop(columns=[0])
y = df[0]

In [76]:
X = np.array(X)
y = np.array(y)

In [77]:
w = log_regr(X, y)
w

0.035731266398741086 0.03245997127838869
0.06243037429473823 0.05521348240427658
0.08343427766831138 0.07202494311027566
0.10067272600016682 0.0850295514102227
0.11522226961113709 0.09538540191331753
0.1277368954185646 0.1037785195937863
0.13864802866699205 0.1106526151987705
0.14825909445507238 0.1163151872210771
0.15679400747483294 0.12099069710246729
0.1644241120056825 0.12484950726225982
0.1712842172222513 0.12802480132366875
0.1774827070945897 0.1306230947901386
0.18310822687402223 0.13273110349250564
0.18823428557659425 0.13442042647716987
0.19292253199924378 0.1357508546739456
0.19722515357223086 0.13677278004409057
0.2011866757636018 0.13752899513722402
0.20484533998328375 0.13805606692026984
0.2082341776612892 0.13838540539556762
0.21138186050845711 0.13854410833963507
0.21431338271195155 0.13855563848353286
0.21705061477323695 0.1384403730402841
0.21961275782602707 0.13821605443749663
0.22201671974682113 0.13789816350963693
0.22427742905948655 0.1375002310637576
0.22640809882

(0.2878116204717764, 0.09198330215925439)

In [78]:
y_pred = [sigmoid(x, w) for x in X]
y_pred

[0.44922585542317794,
 0.6902064874349346,
 0.4279842330215067,
 0.698342603398199,
 0.6144048504645165,
 0.5883985371627194,
 0.7020383716054767,
 0.5288893573505367,
 0.4197121128056825,
 0.5958461182829787,
 0.4997746020747329,
 0.4990656846387426,
 0.4792041245057152,
 0.5976833927340034,
 0.6495890619026284,
 0.5687875039771356,
 0.4708692480854883,
 0.6213822802601785,
 0.6698352633447834,
 0.7447521625073933,
 0.5457723274356339,
 0.015431289063986543,
 0.41544251021663786,
 0.44921358087475804,
 0.6127254703702426,
 0.5371078643536809,
 0.6977300634430932,
 0.7586436383262608,
 0.4712684104436308,
 0.5872255352633596,
 0.4619693788552515,
 0.48523066042458146,
 0.785695464198339,
 0.7499380962526107,
 0.5178671871340186,
 0.660824999067774,
 0.7049380792038414,
 0.6673205675185813,
 0.5836209124563856,
 0.6960607121777536,
 0.6478517317313925,
 0.44015592276161614,
 0.594689792150878,
 0.46013920592904256,
 0.5303377405518828,
 0.6420786383877348,
 0.8042441984821952,
 0.472287

In [79]:
a = metrics.roc_auc_score(y, y_pred)
a

0.9268571428571428

In [80]:
w_reg = log_regr(X, y, C=10)
w_reg

0.035731266398741086 0.03245997127838869
0.026699107895997144 0.022753511125887887
0.029057389054538967 0.02532643446716265
0.028426566376644737 0.02463532715145532
0.028594936427109398 0.024819781681715954
0.02854994677245764 0.024770486880730656
0.028561965517012657 0.024783655436404747
0.028558754546234223 0.02478013724973556
8


(0.028558754546234223, 0.02478013724973556)

In [81]:
y_pred_reg = [sigmoid(x, w_reg) for x in X]
y_pred_reg

[0.4944025475025294,
 0.5294960008513981,
 0.4957368033419395,
 0.5282277128147582,
 0.520966409819608,
 0.5062604414439291,
 0.5300823367598685,
 0.508618029218254,
 0.4887377640717866,
 0.5175760055944163,
 0.5019164771626575,
 0.5015494554061638,
 0.4934116084179819,
 0.5055899757833094,
 0.5222836185848817,
 0.5043425922116569,
 0.49295960028999414,
 0.5191125020027608,
 0.5219557272753146,
 0.5348921372802882,
 0.5021801475793493,
 0.381319645530652,
 0.48731919498878196,
 0.4920696728364942,
 0.5120357324177529,
 0.5039500221633949,
 0.5306912927485266,
 0.5410776318394168,
 0.4983937869387338,
 0.5135725993580922,
 0.49451210360457637,
 0.5024753364374631,
 0.546004822044189,
 0.5326205306762868,
 0.5073897010518211,
 0.5137854596695193,
 0.5341760293556234,
 0.5230097578402351,
 0.507203224404265,
 0.5238303526560476,
 0.5182801518655145,
 0.49704680889686764,
 0.5127011195651381,
 0.49238437577399274,
 0.5070329898537863,
 0.5197279682757737,
 0.5438830405783189,
 0.4972685428

In [82]:
b = metrics.roc_auc_score(y, y_pred_reg)
b

0.9362857142857142

In [83]:
f = open('q1.txt', 'w')
f.write(str(a) + ' ' + str(b))
f.close()