In [9]:
"""
This code shows an example implementing the fast gradient algorithm to compute
the beta coefficients and misclassification error. It compares the beta
coefficients to that of sklearn.
"""


import src.logistic_reg as lreg
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# create simulated dataset
np.random.seed(0)

data1 = np.random.random(size=(250, 50)) - 0.05
data2 = np.random.random(size=(300, 50)) + 0.05
data = np.concatenate((data1, data2), axis=0)

label1 = np.ones(shape=(250, ))
label2 = np.ones(shape=(300, ))*-1
label = np.concatenate((label1, label2), axis=0)

# define the split between train and test data
x_train, x_test, y_train, y_test = train_test_split(data,
                                                    label,
                                                    random_state=0)

# standardize the data
x_scaler = StandardScaler().fit(x_train)
x_train = x_scaler.transform(x_train)
x_test = x_scaler.transform(x_test)
n = x_train.shape[0]
d = x_train.shape[1]

# initialize the beta and theta values
beta_init = np.zeros(d)
theta_init = np.zeros(d)

# run the fast gradient algorithm to find the beta coefficients
fastgrad_betas = lreg.fastgradalgo(beta_init=beta_init,
                                   theta_init=theta_init,
                                   lamb=0.1,
                                   x=x_train,
                                   y=y_train,
                                   max_iter=1000)[-1]

# run sci-kit learn's LogisticRegression() to find the beta coefficients
logit = LogisticRegression(C=1/(2*n*0.1),
                           fit_intercept=False,
                           tol=1e-8).fit(x_train, y_train)

# print the coefficients found using the fast gradient algorithm and sklearn
print("\nFast Gradient Algorithm Coefficients:\n", fastgrad_betas)
print("\nSci-kit Learn's LogisticRegression() Coefficients:\n", logit.coef_)

# apply the coefficients found using the fast gradient algorithm to test set
y_predict = (np.dot(x_test, fastgrad_betas) > 0)*2 - 1

# print the misclassification error
print("\nMisclassification Error: %.2f%%" % (np.mean(y_predict != y_test)*100))



Fast Gradient Algorithm Coefficients:
 [-0.14274807 -0.17335035 -0.1147769  -0.10192978 -0.13242802 -0.15882511
 -0.09807183 -0.14414955 -0.12638705 -0.0997095  -0.20047638 -0.1474371
 -0.17097865 -0.07797474 -0.06824291 -0.18755379 -0.18393916 -0.18393293
 -0.07210646 -0.11044892 -0.08471212 -0.11821681 -0.15741851 -0.12275346
 -0.06796452 -0.0957313  -0.13293313 -0.11846007 -0.11449315 -0.09598109
 -0.12723513 -0.15807295 -0.09640093 -0.1241127  -0.11641163 -0.16153677
 -0.08688691 -0.10681594 -0.11069182 -0.12878644 -0.1443601  -0.13071213
 -0.18995598 -0.13631174 -0.16809871 -0.11727973 -0.14311504 -0.1587285
 -0.16637538 -0.18348254]

Sci-kit Learn's LogisticRegression() Coefficients:
 [[-0.14274808 -0.17335034 -0.11477689 -0.10192978 -0.13242802 -0.15882511
  -0.09807183 -0.14414954 -0.12638705 -0.09970951 -0.20047637 -0.14743709
  -0.17097865 -0.07797474 -0.06824292 -0.18755379 -0.18393915 -0.18393293
  -0.07210647 -0.11044892 -0.08471212 -0.11821681 -0.1574185  -0.12275346
  -