In [1]:
import numpy as np
import math
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings("ignore")

## Self Implementation of Logistic Regression

In [2]:
def get_m(input_, output, learning_rate, m):
    m_slope= np.zeros(input_.shape[1]+1)
    
    M = len(input_)
    for i in range(M):
        x = input_[i]
        x = np.append(x,1)
        y = output[i]
        
        for j in range(input_.shape[1]):
            hx = 1/(1 + math.exp(-(m*x).sum()))
            m_slope[j] += (-1/M)*(y-hx)*x[j]
    new_m = m - learning_rate*m_slope
    
    return new_m

In [3]:
def logistic_regression(input_, output, learning_rate, num_iterations):
    m = np.zeros(input_.shape[1]+1)
    for i in range(num_iterations):
        m = get_m(input_, output, learning_rate, m)
        #print((i+1), " Cost: ", cost(points, m))
    return m

In [4]:
def pred(input_, m):
    
    ans = []
    M = len(input_)
    for i in range(M):
        x = input_[i]
        x = np.append(x,1)
        
        hx = hx = 1/(1 + math.exp(-(m*x).sum()))
        
        if hx > 0.5:
            ans.append(1)
        else:
            ans.append(0)
    return ans

In [5]:
cancer_ds = datasets.load_breast_cancer()
x_train, x_test, y_train, y_test = train_test_split(cancer_ds.data, cancer_ds.target, test_size = 0.2, random_state = 0)

In [6]:
m = logistic_regression(x_train, y_train , 0.0001, 200)
y_pred_self = pred(x_test, m)

## Using Logistic Regression Implemented in Sklearn

In [7]:
clf = LogisticRegression()
clf.fit(x_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [8]:
y_pred = clf.predict(x_test)

## Comparison of Self-Implemented and Sklearn Implemented Logistic Regression

### Accuracy

In [9]:
print('-----------------------------------------------------------------------')
print('Accuarcy of Self Implemented Logistic Regression is', accuracy_score(y_test, y_pred_self))
print('-----------------------------------------------------------------------')
print('Accuarcy of Sklearn Implemented Logistic Regression is', accuracy_score(y_test, y_pred))
print('-----------------------------------------------------------------------')

-----------------------------------------------------------------------
Accuarcy of Self Implemented Logistic Regression is 0.8771929824561403
-----------------------------------------------------------------------
Accuarcy of Sklearn Implemented Logistic Regression is 0.956140350877193
-----------------------------------------------------------------------


### Classification Report

In [10]:
print("---------------------------------------------------------------------------")
print("Classification Report of Self Implemented Logistic Regression")
print(classification_report(y_test,y_pred_self))
print("---------------------------------------------------------------------------")
print("Classification Report of Sklearn Implemented Logistic Regression")
print(classification_report(y_test,y_pred))
print("---------------------------------------------------------------------------")

---------------------------------------------------------------------------
Classification Report of Self Implemented Logistic Regression
              precision    recall  f1-score   support

           0       0.87      0.83      0.85        47
           1       0.88      0.91      0.90        67

    accuracy                           0.88       114
   macro avg       0.88      0.87      0.87       114
weighted avg       0.88      0.88      0.88       114

---------------------------------------------------------------------------
Classification Report of Sklearn Implemented Logistic Regression
              precision    recall  f1-score   support

           0       0.92      0.98      0.95        47
           1       0.98      0.94      0.96        67

    accuracy                           0.96       114
   macro avg       0.95      0.96      0.96       114
weighted avg       0.96      0.96      0.96       114

-------------------------------------------------------------------

### Confusion Matrix

In [11]:
print("---------------------------------------------------------------------------")
print("Confusion Matrix of Self Implemented Logistic Regression")
print(confusion_matrix(y_test,y_pred_self))
print("---------------------------------------------------------------------------")
print("Confusion Matrix of Sklearn Implemented Logistic Regression")
print(confusion_matrix(y_test,y_pred))
print("---------------------------------------------------------------------------")

---------------------------------------------------------------------------
Confusion Matrix of Self Implemented Logistic Regression
[[39  8]
 [ 6 61]]
---------------------------------------------------------------------------
Confusion Matrix of Sklearn Implemented Logistic Regression
[[46  1]
 [ 4 63]]
---------------------------------------------------------------------------
