# Logistic Regression, Stochastic Gradient, Mini Batch Gradient Descent
## Name = Atharv Subhekar

### Importing Libraries

In [157]:
# preprocessing libraries
import pandas as pd
import numpy as np
# algorithm libraries
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

### Data Preprocessing

In [158]:
data = 'wdbc.data.csv'
names = ['ID', 'diagnosis']
for i in range(1, 31):
    names.append(str(i))
df = pd.read_csv(data, names = names)

In [159]:
df.head()

Unnamed: 0,ID,diagnosis,1,2,3,4,5,6,7,8,...,21,22,23,24,25,26,27,28,29,30
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [160]:
target = df['diagnosis']
df = df.drop(columns = ['ID', 'diagnosis'])

In [161]:
target.replace('B', -1, inplace = True)
target.replace('M', 1, inplace = True)

In [162]:
target.shape

(569,)

In [163]:
X_train, X_test, y_train, y_test = train_test_split(df, target, test_size = 0.2)
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

### Logistic Regression

In [164]:
def sigmoid(w, x):
    numerator = 1
    denominator = 1 + np.exp(-np.dot(w.T, x))
    
    return numerator / denominator

In [165]:
def gradient(w, x, y):
    n = x.shape[0]
    summ = 0
    
    for i in range(n):
        summ += (sigmoid(w, x) - y) * x
        
    return -summ

### Stochastic Gradient Descent

In [166]:
def stochastic_gradient(w, x, y):
    n = x.shape[0]
    summ = 0
    
    for i in range(n):
        summ += (sigmoid(w, x) - y) * x
        
    return -summ

In [167]:
def sgd(x, y, learning_rate, w, max_epoch=100):
    while max_epoch > 0:
        for i in range(x.shape[0]):
            grad = stochastic_gradient(w, x[i], y[i])
        w = w - grad * learning_rate 
        
        max_epoch = max_epoch - 1
        
    return w

In [168]:
w = np.zeros((X_train.shape[1]))
w_sgd = sgd(X_train, y_train, 0.01, w)

  denominator = 1 + np.exp(-np.dot(w.T, x))


### Mini-Batch Gradient Descent

In [169]:
def minibatch_gradient(w, x, y):
    summ = 0
    
    for i in range(len(x)):
        summ += np.dot((sigmoid(w, x[i]) - y[i]),x[i])
        
    return summ

In [170]:
def create_mini_batches(x, y, batch_size = 10):
    indices = [i for i in range(x.shape[0])]
    np.random.shuffle(indices)
    mini_batch = []
    i = 0
    while i < x.shape[0]:
        mini_batch.append(indices [i : i + batch_size -1])
        i = i + batch_size
    
    return mini_batch

In [171]:
def mbgd(x, y, learning_rate, w, max_epoch=100):
    i = 0
    while max_epoch > 0:
        mini = create_mini_batches(x, y, 10)
        for batches in mini:
            x_mini = [x[i] for i in batches]
            y_mini = [y[i] for i in batches]
    
            mbgrad = minibatch_gradient(w, x_mini, y_mini)
            w = w - learning_rate * mbgrad
        
        max_epoch = max_epoch - 1
    return w

In [172]:
w = np.zeros((X_train.shape[1]))
w_mbgd = mbgd(X_train, y_train, 0.01, w)

  denominator = 1 + np.exp(-np.dot(w.T, x))


### Cross Validation

In [175]:
def evaluate(x_train, y_train, x_test, y_test, gradient_descent = 'mbgd'):
    #list for storing folds
    train_fold = list()
    test_fold = list()
    
    # kfold
    kf = KFold(n_splits = 5)
    
    # getting indices of data in each fold
    for i, (train_index, test_index) in enumerate(kf.split(x_train)):
        train_fold.append(list(train_index))
        test_fold.append(list(test_index))
        
    # Calculating weights
    for i in range(len(train_fold)):
        train_data = [x_train[j] for j in train_fold[i]]
        train_labels = [y_train[j] for j in train_fold[i]]
        
        test_data = [x_train[j] for j in test_fold[i]]
        test_labels = [y_train[j] for j in test_fold[i]]
        
        weights = np.zeros((x_train.shape[1]))
        wsgd = sgd(np.asarray(train_data), np.asarray(train_labels), 0.01, weights)
        wmbgd = mbgd(np.asarray(train_data), np.asarray(train_labels), 0.01, weights)
        
        # predicting target and evaluating accuracy
        y_predicted_sgd = []
        y_predicted_mbgd = []
        for x in test_data:
            y_pred_sgd = np.dot(wsgd.T, x)
            y_pred_mbgd = np.dot(wmbgd.T, x)
            
            if y_pred_sgd > 1:
                y_predicted_sgd.append(1)
            else:
                y_predicted_sgd.append(-1)
            if y_pred_mbgd > 1:
                y_predicted_mbgd.append(1)
            else:
                y_predicted_mbgd.append(-1)
        #Metrics for SGD        
        t_pos_sgd = 1
        t_neg_sgd = 1
        f_pos_sgd = 1
        f_neg_sgd = 1
        
        for i in range(len(y_predicted_sgd)):
            if y_predicted_sgd[i] > 0 and test_labels[i] > 0:
                t_pos_sgd += 1
            if y_predicted_sgd[i] > 0 and test_labels [i] < 0:
                f_pos_sgd += 1
            if y_predicted_sgd[i] < 0 and test_labels[i] > 0:
                f_neg_sgd += 1
            if y_predicted_sgd[i] < 0 and test_labels[i] < 0:
                t_neg_sgd += 1
        
        # Calculated precision score from confusion matrix
        sgd_prec = []
        sgd_rec = []
        sgd_acc = []
        
        sgd_rec.append(t_pos_sgd / (t_pos_sgd + f_neg_sgd))
        sgd_prec.append(t_pos_sgd / (t_pos_sgd + f_pos_sgd))
        sgd_acc.append(t_pos_sgd + t_neg_sgd / t_pos_sgd + t_neg_sgd + f_pos_sgd + f_neg_sgd)
        
        
        # Metrics for BSGD
        t_pos_mbgd = 1
        t_neg_mbgd = 1
        f_pos_mbgd = 1
        f_neg_mbgd = 1
        
        for i in range(len(y_predicted_mbgd)):
            if y_predicted_mbgd[i] > 0 and test_labels[i] > 0:
                t_pos_mbgd += 1
            if y_predicted_mbgd[i] > 0 and test_labels [i] < 0:
                f_pos_mbgd += 1
            if y_predicted_mbgd[i] < 0 and test_labels[i] > 0:
                f_neg_mbgd += 1
            if y_predicted_mbgd[i] < 0 and test_labels[i] < 0:
                t_neg_mbgd += 1
        
        # lists for storing data
        mbgd_prec = []
        mbgd_rec = []
        mbgd_acc = []
        
        mbgd_rec.append(t_pos_mbgd / (t_pos_mbgd + f_neg_mbgd))
        mbgd_prec.append(t_pos_mbgd / (t_pos_mbgd + f_pos_mbgd))
        mbgd_acc.append(t_pos_mbgd + t_neg_mbgd / t_pos_mbgd + t_neg_mbgd + f_pos_mbgd + f_neg_mbgd)
                
        
    if gradient_descent == 'mbgd':
        return mbgd_rec, mbgd_prec, mbgd_acc
    else:
        return sgd_rec, sgd_prec, sgd_acc

In [176]:
#evaluate SGD and MBGD
rec, prec, acc = evaluate(X_train, y_train, X_test, y_test, "sgd")
print("Recall: ", rec)
print("Precision: ", prec)
print("Accuracy: ", acc)
rec_mbgd, prec_mbgd, acc_mbgd = evaluate(X_train, y_train, X_test, y_test, "mbgd")
print("Recall: ", rec_mbgd)
print("Precision: ", prec_mbgd)
print("Accuracy: ", acc_mbgd)

  denominator = 1 + np.exp(-np.dot(w.T, x))


Recall:  [0.9696969696969697]
Precision:  [0.34408602150537637]
Accuracy:  [95.03125]
Recall:  [0.2727272727272727]
Precision:  [0.9]
Accuracy:  [101.77777777777777]
