# Logistic Regression Using SGD with Momentum

In [2]:
import numpy as np
import pandas as pd
from sklearn import datasets

In [3]:
data = datasets.load_breast_cancer()

In [4]:
X = data['data']
Y = data['target']

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, stratify = Y, shuffle = True)

In [6]:
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((426, 30), (143, 30), (426,), (143,))

In [7]:
import numpy as np
import pandas as pd
from sklearn import datasets

data = datasets.load_breast_cancer()

X = data['data']
Y = data['target']

from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, stratify = Y, shuffle = True)

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((426, 30), (143, 30), (426,), (143,))

In [13]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [8]:
import math
def sigmoid(X):
    return 1/(1+math.exp(-X)) 
def log_loss(y, y_):
    return -1*(y*math.log(sigmoid(y_)) + (1-y)*math.log(1-sigmoid(y_)))

def cal_grad(y,y_,x):
    return (y - sigmoid(y_)) * (x.reshape(-1, 1)) 

In [9]:
def train(X, Y, epochs = 1000):
    """
    v1 = eta*g1
    vt = gamma*v_(t-1) + eta*gt
    wt = wt-1 - vt
    """
    M = X.shape[0]
    N = X.shape[1]
    w, eta =  np.random.uniform(-1, 1, size = N).reshape(N, 1), 1.0
    prev_v_w, gamma = 0, 0.9
    for epoch in range(epochs):
        gt = 0
        for i in range(len(X)):
            y = Y[i]
            y_ = np.dot(w.T, X[i].reshape(N, 1)) 
            gt += cal_grad(y, y_, X[i])
            
        v_w = gamma*prev_v_w + -(1.0/M)*eta* gt

        w = w - v_w
        prev_v_w = v_w
        
        if epoch % 100 == 0: 
            print(log_loss(y, y_))
        if(log_loss(y,y_) < 1e-10):
            break
    return w

In [10]:
w = train(X_train, Y_train)

0.8446730339544043
0.043287846873942486
0.07970971647400106
0.08454760291275286
0.09663579230006461
0.10721158088328958
0.11554030313727556
0.12140579209474434
0.12526454154309163
0.12768510439753697


In [11]:
def predict(X):
    y_pred = []
    for i in range(len(X)):
        y_pred.append(np.round(sigmoid(np.dot(w.T, X[i].reshape(X.shape[1], 1)))))
    return y_pred

In [12]:
y_pred = predict(X_test)

In [13]:
from sklearn.metrics import accuracy_score, f1_score, auc, roc_auc_score, roc_curve
print("accuracy : {}".format(accuracy_score(Y_test, y_pred)))

accuracy : 0.986013986013986


# Logistic Regression Using Sklearn

In [14]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(X_train, Y_train)
y_pred_ = clf.predict(X_test)

In [15]:
print("accuracy : {}".format(accuracy_score(Y_test, y_pred_)))

accuracy : 0.972027972027972
