Import all required libraries

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

define function for accuracy calculation

In [2]:
def calculate_accuracy(predicted, targets):
    accuracy = 0
    for predicted_value, target in zip(predicted, targets):
        if predicted_value == target:
            accuracy += 1
    return accuracy/len(predicted)

read initial data and split into train and test

In [3]:
cancer = load_breast_cancer()
train_targets, test_targets, train_datas, test_datas =  train_test_split(cancer.target, cancer.data, test_size = 0.25, random_state=5)

Logistic regression implementation

In [4]:
from sklearn.base import BaseEstimator, ClassifierMixin
import numpy as np

class MyLogisticRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, learning_rate = 0.01, 
                 iterations = 5000, 
                 epsilon = 0.1, 
                 threshold = 0.5):
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.epsilon = epsilon
        self.threshold = threshold
        
    def fit(self, features, targets):
        self.train_features = features
        self.train_targets = targets
        self.weights = self.calculate_weights()
    
    def predict(self, features):
        predictions = self.predict_with_weights(features, self.weights) 
        return self.classify(predictions)
    
    def classify(self, predictions):
        return [(1 if prediction >= self.threshold else 0) for prediction in predictions]

    def calculate_weights(self):
        weights = np.zeros(len(self.train_features[0]))
        i = 0 
        while(True): 
            improved_weights = self.improve_weights(weights)
            if(i == self.iterations or np.linalg.norm(improved_weights-weights) < self.epsilon):
                break
            i = i+1
            weights = improved_weights
        return weights
    
    def improve_weights(self, weights):
        predictions = self.predict_with_weights(self.train_features, weights) 
        gradient = np.dot(self.train_features.T,  predictions - self.train_targets)
        improved_weights = weights - self.learning_rate * gradient
        return improved_weights
    
    def predict_with_weights(self, features, weights):
        z = np.dot(features, weights)
        return self.logistic_function(z)
    
    def logistic_function(self, x):
        return 1/(1+np.exp(-x))

Check sklearn log reg

In [5]:
sklg = LogisticRegression()
sklg.fit(train_datas, train_targets)
sklg_predicted = sklg.predict(test_datas)
print("Sklearn accuracy:",calculate_accuracy(sklg_predicted, test_targets))

Sklearn accuracy: 0.9790209790209791


check my log reg

In [6]:
lg = MyLogisticRegression() 
lg.fit(train_datas, train_targets) 
predicted = lg.predict(test_datas) 
print("My accuracy:", calculate_accuracy(predicted, test_targets))

My accuracy: 0.9440559440559441
