# Logistic Regression using Newton Raphson Method

Downloaded all the dependencies and packages needed to analyse the data.

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import datasets

Loaded the breast cancer data set from the datasets in sklearn which is same data as the one given in the downloaded link

In [2]:
ab = datasets.load_breast_cancer()

# Data Cleaning

Cleaned the data by renaming all the rows whereby python had assumed the first row as the names of all the rows.

In [3]:
ab.columns =['id', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape', 'Marginal Adhesion', 
             'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli', 'Mitoses','diagnosis']

Defined the x and y variable which are used to split the data into test and train data sets.

In [4]:
x = ab.data

In [5]:
y = ab.target

# Splitting and Data Training

I divided the dataset ab randomly into 80% training instances and 20% testing instances as directed:

In [6]:
train_x, test_x,train_y, test_y = train_test_split(x, y, test_size=0.2)

# Newton- Raphson Method

It is a second order optimization algorithm that allows to find best weights where we compute the hessian matrix, the gradient, the predicted probabilities and the acccuracy of the model as shown in the code below.

In [7]:
def sigmoidalgorithm(number):             
    return 1/(1+np.exp(-number))

In [8]:
def algorithmTopredict(val, w):
    var_linear = np.dot(val,w)
    val_y = sigmoidalgorithm( var_linear)
    valcls_y = [1 if i>0.5 else 0 for i in val_y]
    return  valcls_y 

In [9]:
def testaccuracyModel(val_y, predicted_y):
    resultofAccuracy = np.sum(val_y == predicted_y)/len(val_y)
    return resultofAccuracy

# Training and Applying Newton algorithm on data set

The solution is faster because it allows to find best weights in logistic function in fewer iterations compared to gradient descent method.

In [10]:
def newton(item_a,item_b):
    item_a = train_x
    item_b = train_y

    w = np.zeros(item_a.shape[1]) 

    totalIterations= 10;
    newton_cost = np.zeros(( totalIterations,1))

    for i in range( totalIterations):
        H = 1/(1+np.exp(-item_a.dot(w)))
        newton_cost[i] = -(1/(np.size(item_a,0)))*sum(item_b*np.log(H) + (1- item_b)*np.log(1-H))
        findgradient = (1/(np.size(item_a,0)))*(item_a.T.dot((H-item_b)))
        # hessian matrix
        Hessian = (1/(np.size(item_a,0)))*(item_a.T.dot(np.diag(H.reshape(np.size(item_a,0),))).dot(np.diag((1-H).reshape(np.size(item_a,0),))).dot( item_a))
        #computing weight vector
        w = w - np.linalg.pinv(Hessian).dot(findgradient)
    return w

# Computing the accuracy using the Newton Raphson Algorithm

In [11]:
w = newton(train_x,train_y )

  if sys.path[0] == '':
  if sys.path[0] == '':


In [12]:
prediction_newton = algorithmTopredict(test_x, w)

In [13]:
accuracy_newton = testaccuracyModel(test_y,prediction_newton)

In [14]:
print(accuracy_newton)

0.9649122807017544


# Computing the accuracy of logistic regression using the sklearn

In [15]:
from sklearn.linear_model import LogisticRegression

valregression = LogisticRegression(solver = 'newton-cg')
valregression.fit(train_x, train_y)

LogisticRegression(solver='newton-cg')

In [16]:
prediction_sk = valregression.predict(test_x)

In [17]:
Model_accuracy = testaccuracyModel(test_y,prediction_sk )

In [18]:
print(Model_accuracy)

0.9736842105263158
