# Part 1.1 - Optimizing test accuracy as a function of learning rate and maximum iterations

In [3]:
import warnings
warnings.filterwarnings('ignore')

# Importing standard modules
import numpy as np
import pandas as pd
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
import lr

In [2]:
train = pd.read_csv('../data/diabetes/diabetes_train.csv')
test = pd.read_csv('../data/diabetes/diabetes_test.csv')
valid = pd.read_csv('../data/diabetes/diabetes_val.csv')
all_data = pd.concat([train, test, valid], axis=0)

In [10]:
# Function to find convergent solution of gradient descent as a function of learning-rate and maximum iterations

def optimize(max_iter, learning_rate, pred_data):
    
    # Input data, features and binary labels column
    Xin = train.drop('Outcome',axis=1).to_numpy()
    Yin = train['Outcome'].to_numpy()
    
    # Prediction data, validation set
    Xp = pred_data.drop('Outcome',axis=1).to_numpy()
    Yp = pred_data['Outcome'].to_numpy().ravel()
    
    # Iterate through the input parameters
    for l in learning_rate:
        
        for m in max_iter:
            
            #print("MAX ITERATIONS: ", m)
            print(f'LEARNING RATE: {l} \n')
            model = lr.LogisticRegression(verbose=True, add_bias=True, learning_rate=l, max_iters=m)
            yh = model.fit(Xin,Yin).predict(Xp)
            T = []
            T.append(yh)
        
            # Decision Boundary
            prediction = []
            for x in np.array(T).ravel():
                if x < 0.5: prediction.append(0)
                else: prediction.append(1)

            #print(T)
            #print(prediction)
            #print(Yp)
            print("Accuracy Score:", accuracy_score(Yp,np.array(prediction).ravel()))

In [None]:
# Best set of parameters so far:
m = [1e6, 5e6]
l = [2e-4, 5e-4]

optimize(max_iter=m, learning_rate=l, pred_data=test)

In [33]:
# This function determines the accuracy the Sklearn logisitic classifier can achieve

def compareSK(pred_data):
    
    # Input data, features and binary labels column
    Xin = train.drop('Outcome',axis=1).to_numpy()
    Yin = train['Outcome'].to_numpy()
    
    # Prediction data, validation set
    Xp = pred_data.drop('Outcome',axis=1).to_numpy()
    Yp = pred_data['Outcome'].to_numpy().ravel()
    
    model = SGDClassifier(max_iter=9e6, alpha=0.0002)
    yh = model.fit(Xin,Yin).predict(Xp)
    
    T = []
    T.append(yh)
    print(T)
    
    # Decision Boundary
    prediction = []
    for x in np.array(T).ravel():
        if x < 0.5: prediction.append(0)
        else: prediction.append(1)
            
    print(Yp)
    
    print("Accuracy Score:", accuracy_score(Yp,np.array(prediction).ravel()))

In [61]:
compareSK(pred_data=test)

[array([0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1])]
[0 0 1 0 0 0 0 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 1 1 1 0 0 1 0 0 0 0 0 0 0 0
 0 0 1 1 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 1 0 0 1 0 1 1 0 1]
Accuracy Score: 0.6764705882352942


## Results & Remarks

* Our base model preformed better than the Sklearn SGD logisitic classifier with a margin greater than 10%

* Decision Boundary is set at X=0.5: $$P(y=1|X) < 0.5 \rightarrow \hat{y}=0$$ $$P(y=1|X) \geq 0.5 \rightarrow \hat{y}= 1$$ 


* Best accuracy achieved on the test set was 77.9% with $\alpha = 2 * 10^{-4}$ and $i_{max} = 1 * 10^6$

* Output (including weights and magnitude of gradient vector):

```
                    LEARNING RATE: 2e-4
                    
                    1000000 Iterations
                    Norm of gradient: 0.013429351710347736

                    Weights: [ 1.29152029e-01  2.50264215e-02 -1.86834650e-02 -2.52562870e-03
                      4.12691159e-04  4.13164885e-02  5.23867127e-01  6.70382786e-04
                     -4.46242101e+00]

                    Accuracy Score: 0.7794117647058824
                    
                    -----------------------------------------------------------------------------
                    
                    LEARNING RATE: 9e-05 

                    5000000 Iterations
                    Norm of gradient: 0.005435090157130823

                    Weights: [ 1.36309194e-01  3.11128368e-02 -1.48346749e-02 -3.04513818e-03
                     -8.58120779e-05  6.64128026e-02  7.09122848e-01  6.22651319e-03
                     -6.61628338e+00]

                    Accuracy Score: 0.7794117647058824
```