**LOGISTIC REGRESSION: NEWTON RAPHSON APPROACH**

**Importing Libraries and getting the dataset**

In [21]:
#library
import pandas as pd
import numpy as np

mydata = pd.read_table("https://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt",
        delimiter=",", header=0, names=["X1","X2","X3","X4","Y"])

#View Data
mydata.shape

(1371, 5)

**Data Preprocessing**

In [22]:
# get input "X1","X2","X3","X4"
inputX = mydata[["X1","X2","X3","X4"]].to_numpy()

# define input X
X_1 = np.matrix(np.ones(mydata.shape[0])).T
X = np.append(X_1,inputX,axis=1)

# define output Y
Y = mydata[["Y"]].to_numpy()
X[:5,:]

matrix([[ 1.     ,  4.5459 ,  8.1674 , -2.4586 , -1.4621 ],
        [ 1.     ,  3.866  , -2.6383 ,  1.9242 ,  0.10645],
        [ 1.     ,  3.4566 ,  9.5228 , -4.0112 , -3.5944 ],
        [ 1.     ,  0.32924, -4.4552 ,  4.5718 , -0.9888 ],
        [ 1.     ,  4.3684 ,  9.6718 , -3.9606 , -3.1625 ]])

**Setting the Newton Raphson Method**

In [23]:
# f(β)
def f(beta):
    return np.ravel(np.ones(len(Y))*(np.log(1+np.exp(X*beta)))-Y.T*X*beta)[0]

# ∇f(β)
def deriv1_f(beta):
    return X.T*(1/(1+1/np.exp(X*beta))-Y)

# ∇²f(β)
def deriv2_f(beta):
    return X.T*(np.diag(np.ravel(np.exp(X*beta)/np.power(1+np.exp(X*beta),2)))*X)

# Newton raphson Method
beta = np.matrix(np.zeros(X.shape[1])).T
TOL = np.power(10.,-10)
counter = 0

while np.linalg.norm(deriv1_f(beta)) > TOL:
  counter += 1
  beta -= np.linalg.inv(deriv2_f(beta))*deriv1_f(beta)
  
print('iter =',counter)
print(beta)
print('norm =',np.linalg.norm(deriv1_f(beta)))

iter = 13
[[ 7.32180471]
 [-7.85933049]
 [-4.19096321]
 [-5.28743068]
 [-0.60531897]]
norm = 3.4587094900356575e-13


**LOGISTIC REGRESSION: GRADIENT DESCENT APPROACH**

In [24]:
# Gradient Descent
beta = np.matrix(np.zeros(X.shape[1])).T
TOL = np.power(10.,-10)
lam = 0.001 # learning_rate
counter = 0

while np.linalg.norm(deriv1_f(beta)) > TOL:
  counter += 1
  beta -= lam*deriv1_f(beta)
    
print('iter =',counter)
print(beta)
print('norm =',np.linalg.norm(deriv1_f(beta)))

iter = 164906
[[ 7.32180471]
 [-7.85933049]
 [-4.19096321]
 [-5.28743068]
 [-0.60531897]]
norm = 9.998779425131155e-11


In [25]:
# Classification Process
Xtest = [[1, 0.4,0.5,1.0,1.5]] # X1=0.4, X2=0.5, X3=1.0, X4=1.5
p = (np.exp(np.dot(Xtest, beta)) / (1 + np.exp(np.dot(Xtest, beta))))
p

matrix([[0.01609821]])

**LOGISTIC REGRESSION: SCIKIT-LEARN**

In [26]:
# Library
from sklearn.linear_model import LogisticRegression

# Newton-Conjugate Gradient
clf1 = LogisticRegression(penalty="none",solver='newton-cg',fit_intercept=False)
clf1.fit(X,np.ravel(Y))
print(clf1.coef_)

[[ 7.32180341 -7.85932911 -4.19096249 -5.28742975 -0.60531885]]




In [29]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [27]:
# Stochastic Average Gradient
clf2 = LogisticRegression(penalty="none",solver='sag',fit_intercept=False,max_iter=1000000) 
clf2.fit(X,np.ravel(Y))
print(clf2.coef_)



[[ 6.77647393 -7.2355473  -3.87108937 -4.87179663 -0.54036167]]


**Classification Process**

In [None]:
clf1.predict([[1, 0.4,0.5,1.0,1.5]])