# Homework 2 Coding: Logistic Regression.
  Please export this jupyter notebook as PDF, and hand in .pdf (with writing part) file.

In this part of homework, you need to implement Logistic Regression using Python in this jupyter notebook.

## Part 0: Preparation before training.

This part loads the necessary libraries and dataset. You are only required to do the normalization by yourself.

In [None]:
#import all the required libraries. You need to implement them in first.
import pandas as pd
from sklearn.datasets import load_breast_cancer
import numpy as np
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
#Loading the dataset including features and binary labels
data = load_breast_cancer().data
target = load_breast_cancer().target

In [None]:
# Size of features and labels
data.shape,target.shape

In [None]:
#Splitting the data into train and test sets 2:1 with certain random seed.
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.33, random_state = 42)

Normalizing data (by yourself)


In [None]:
# A useful trick before training is to normalize all features to have mean 0 and unit variance first.
# Please implement this by yourself rather than use sklearn.preprocessing.StandardScaler as the comment below. 

"""
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
"""
#Your codes below:
for i in range(X_train.shape[1]):
    for j in range(X_train.shape[0]):
        u = np.mean(X_train[j][i])
        s = np.std(X_train[j][i])
    for k in range(len(X_train)):
        X_train[k][i] = (X_train[k][i] - u)/s
    for k in range(len(X_test)):
        X_test[k][i] = (X_test[k][i] - u)/s


Some helper functions are given below, you are free to use them or not in parts below.

In [None]:
# Function to predict y of x with current weights
def predict(x,w):
    y_pred=[]
    for i in range(len(x)):
        y = (np.asscalar(1/(1+np.exp(-(np.dot(w,x[i]))))))
        if y<0.5:
            y_pred.append(0)
        else:
            y_pred.append(1)
    return np.array(y_pred)

In [None]:
#Function to calulate TPR,FPR,TNR and FNR to be included in confusion matrix
def find_rates(mat):
    mat2=[]
    mat2.append((mat[0,0]))
    mat2.append((mat[1,0]))
    mat2.append((mat[0,1]))
    mat2.append((mat[1,1]))
    mat2=np.reshape(mat2,(2,2))
    mat2 = pd.DataFrame(mat2,columns=[0,1],index=[0,1])
    mat2.index.name = 'Predicted'
    mat2.columns.name = 'Actual'
    return mat2

## Part 1: Implement Logistic Regression using sklearn.

In this part, you are firstly given an example Sklearn implementation of logistic regression. Play with them and then you should:
1.  Explain the parameters and their effects in LogisticRegression().
2.  Try different settings of parameters and show its performance as the example.

You can read official document from https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html

In [None]:
#Logistic regression using sklearn
LRexample = LogisticRegression(penalty = 'l2', C=0.1, solver = 'liblinear')
LRexample.fit(X_train, y_train)

In [None]:
# Predict on the test set
y_pred_sklearn = LRexample.predict(X_test)

In [None]:
# The labels of ground-truth on test set.
np.unique(y_test,return_counts=True)

In [None]:
# The labels produced by LR model on test set.
np.unique(y_pred_sklearn,return_counts=True)

In [None]:
# true-negative, false-negative, false-negative, true-positive
tn, fp, fn, tp = confusion_matrix(y_test, y_pred_sklearn).ravel()
(tn, fp, fn, tp)

In [None]:
mat_test = find_rates(confusion_matrix(y_test, y_pred_sklearn))

fig=plt.figure(figsize=(8,6))
plt.title('Confusion Matrix for test data using sklearn Logistic Regression')
sns.heatmap(mat_test,annot=True,fmt='g')

In [None]:
LRexample.score(X_test, y_test)

Explain the parameters and their effects in LogisticRegression( ) in the Markdown cell below.

# Penalty: 
Penalty item, str type, optional parameters are l1 and l2, default is l2.
The L1G specification assumes that the parameters of the model satisfy the Laplace distribution, and the L2 assumes that the model parameters satisfy the Gaussian distribution.
# C:
The reciprocal of the regularization coefficient $\lemda$, float type, defaults to 1.0.
# solver:
The optimization algorithm selects parameters, and there are only five optional parameters, namely newton-cg, lbfgs, liblinear, sag, saga. The default is liblinear.


Try different settings of Sklearn implementation of logistic regression and show the performance as the example above. Write your codes below. 

In [None]:
l1 = []
l2 = []
l1test = []
l2test = []
for i in np.linspace(0.05,3,59):
    lrl1 = LogisticRegression(penalty="l1",solver="liblinear",C=i)
    lrl2 = LogisticRegression(penalty="l2",solver="liblinear",C=i)
    lrl1 = lrl1.fit(X_train,y_train)
    l1.append(lrl1.score(lrl1.predict(X_train),y_train))
    l1test.append(lrl1.score(lrl1.predict(X_test),y_test))
    lrl2 = lrl2.fit(X_train,y_train)
    l2.append(lrl2.score(lrl2.predict(X_train),y_train))
    l2test.append(lrl2.score(lrl2.predict(X_test),y_test))

graph = [l1,l2,l1test,l2test]
color = ["green","black","lightgreen","gray"]
label = ["L1","L2","L1test","L2test"]

for i in range(len(graph)):
    plt.plot(np.linspace(0.05,3,59),graph[i],color[i],label=label[i])

plt.legend(loc=4) 
plt.show()


In [None]:
lr1 = LogisticRegression(penalty = "none", solver = "lbfgs", C = 1.0)
lr1 = lr1.fit(X_train, y_train)
print(lr1.score(X_test, y_test))
y_pred_sklearn1 = lr1.perdict(X_test)
mat_test = find_rates(confusion_matrix(y_test, y_pred_sklearn1))
fig = plt.figure(figsize=(8,6))
plt.title('Confusion Matrix for test data using sklearn Logistic Regression')
sns.heatmap(mat_test,annot=True,fmt='g')


In [None]:
lr2 = LogisticRegression(penalty = "l2", solver = "sag", C = 0.1)
lr2 = lr2.fit(X_train, y_train)
print(lr2.score(X_test, y_test))
y_pred_sklearn2 = lr2.perdict(X_test)
mat_test = find_rates(confusion_matrix(y_test, y_pred_sklearn2))
fig = plt.figure(figsize=(8,6))
plt.title('Confusion Matrix for test data using sklearn Logistic Regression')
sns.heatmap(mat_test,annot=True,fmt='g')


In [None]:
lr3 = LogisticRegression(penalty = "l2", solver = "newton-cg", C = 1)
lr3 = lr3.fit(X_train, y_train)
print(lr3.score(X_test, y_test))
y_pred_sklearn3 = lr3.perdict(X_test)
mat_test = find_rates(confusion_matrix(y_test, y_pred_sklearn3))
fig = plt.figure(figsize=(8,6))
plt.title('Confusion Matrix for test data using sklearn Logistic Regression')
sns.heatmap(mat_test,annot=True,fmt='g')


## Part 2: Implement Logistic Regression without using its library. 

In this part, you need to implement Logistic regression model using Batch Gradient Descent and Stochastic Gradient Descent by yourself. The hyperparameters of the two algorithms are given and recommended. Notice that with given hyperparameters and random seeds, the weights obtained by BGD and SGD with momentum should be unique.

### Part 2.1: Implement logistic regression using Batch-GD

Decribe the Batch-GD alogrithm in the Markdown cell below. You are free to use mathematical derivation or not.

In [None]:
"""
At each iteration, train all the samples and update weights. The initilization point should be set to all-zero vector.
"""
from datetime import date
n_iter=50  # number of iterations
reg=0.01   # regularization parameter lambda
r=0.1      # learning rate
sample_size=X_train.shape[0]    # batch size for BGD
N=X_train.shape[0]

def s(x):
    if  x >= 0:
        return 1.0/(1+np.exp(x))
    else:
        return np.exp(x)/(1+np.exp(x))

GD = np.zeros(X_train.shape[1])
GD0 = 0.0

for j in range(n_iter):
    #Your codes below:
    gd = np.zeros(X_train.shape[1])
    gd0 = 0.0
    for i in range(N):
        h = s(np.dot(GD, X_train[i]) + GD0)
        gd += (y_train[i] - h) * X_train[i]/N
        gd0 += (y_train[i] - h)/N
    GD = GD + r * gd - r * reg * GD
    GD0 += r * gd0



In [None]:
#Getting predictions for test datapoints
y_pred_BGD = predict(X_test,GD,GD0)

In [None]:
np.unique(y_test,return_counts=True)

In [None]:
np.unique(y_pred_BGD,return_counts=True)

In [None]:
# Draw confusion matrix
mat_test = find_rates(confusion_matrix(y_test, y_pred_BGD))

fig=plt.figure(figsize=(8,6))
plt.title('Confusion Matrix for test data using BGD Logistic Regression')
sns.heatmap(mat_test,annot=True,fmt='g')

### Part 2.2: Implement logistic regression using SGD with momentum

In this part, you need to implement logistic regression using SGD method with momentum for accelerating training.
Intuitively, the method tries to accelerate with keeping the 'momentum' by moving along a previous direction. You may find Chapter 8.3 
helpful https://www.deeplearningbook.org/contents/optimization.html for more details with respect to SGD, momentum and
more acceleration tricks. 

Decribe the SGD with momentum alogrithm in the Markdown cell below. You are free to use mathematical derivation or not.

In [None]:
"""
At each iteration, choose 20 samples randomly and compute dJ(theta)/d(theta) among 
those 20 samples then update the vector of weights with momentum. The initilization point should be set to all-zero vector.

Note that the random seed at each iteration is given, do not modify it.
"""
n_iter=50  # number of iterations
reg=0.01   # regularization parameter lambda
r=0.1      # learning rate
momen = 0.5 # momentum rate
sample_size=20    # sample size for SGD
N=X_train.shape[0]

w_SGD = np.zeros(X_train.shape[1])
w_SGD0 = 0.0
temp = w_SGD
temp0 = w_SGD0

for j in range(n_iter):
    np.random.seed(j) 
    idx=np.random.randint(X_train.shape[0],size=sample_size) 
    # Do NOT modify codes above, especially the random code.
    # At each iterations, choose samples from X_train, y_train, with index idx.
    # Your codes below:
    gd = np.zeros(X_train.shape[1])
    gd0 = 0.0
    for i in idx:
        h = s(np.dot(w_SGD, X_train[i]) + w_SGD0)
        gd += (y_train[i] - h) * X_train[i] / sample_size
        gd0 += (y_train[i] - h) / sample_size
    temp = momen * temp + gd - reg * w_SGD
    w_SGD += r * temp
    temp0 = momen * temp0 + gd0
    w_SGD0 += r * temp0
    


In [None]:
#Getting predictions for test datapoints
y_pred_SGD = predict(X_test,w_SGD,w_SGD0)

In [None]:
np.unique(y_test,return_counts=True)

In [None]:
np.unique(y_pred_SGD,return_counts=True)

In [None]:
# Draw confusion matrix
mat_test = find_rates(confusion_matrix(y_test, y_pred_SGD))

fig=plt.figure(figsize=(8,6))
plt.title('Confusion Matrix for test data using SGD Logistic Regression')
sns.heatmap(mat_test,annot=True,fmt='g')

In [None]:
# Print a table to show every coefficients in vector w, and compute the absolute difference between coefficients of BGD and SGD with momentum methods.

from prettytable import PrettyTable
p = PrettyTable()
p.title='Weights from both models'
p.field_names=['SKlearn','BGD', 'SGD', 'Difference']

# You can directly run the code below to output the table or rewrite it.
# Please remain five decimal places
for i in range(30):
    p.add_row(['{:.5f}'.format(LRclf.coef_[0,i]),'{:.5f}'.format(w_BGD[i]), 
               '{:.5f}'.format(w_SGD[i]), '{:.5f}'.format(abs(w_BGD[i]-w_SGD[i]))])
print(p)