In [2]:
# =============================================================================
# # Import 
# =============================================================================
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import accuracy_score
import random


In [3]:
# =============================================================================
# # Load Data:
# =============================================================================
from sklearn.datasets import load_breast_cancer
dataset = load_breast_cancer()


In [4]:
# =============================================================================
# # Logistic regressor class:
# =============================================================================
class Logistic_Regression_DIY(BaseEstimator, ClassifierMixin):
    
    # Intiation Method
    def __init__(self, learning_rate = 5e-5, num_step = 50000, intercept = False):
        self.step = num_step
        self.learning_rate = learning_rate
        self.intercept = intercept
        pass


      
     # Sigmoid Calculation
    def sigmoid(self, scores):
        return 1 / (1 + np.exp(-scores))


    
    def log_likelihood(features, target, weights):
        scores = np.dot(features, weights)
        ll = np.sum( target*scores - np.log(1 + np.exp(scores)) )
        return ll
    


    
    # Fit Method of Classifier
    def fit(self, X, y):
        # data should be as a numpy array
        self._data_ = X.copy()
        self._targets_ = y.copy()
#        self._weights_ = logistic_regression(self._data_, self._targets_, self.step, self.learning_rate, self.intercept)
        #If we want to add an intercept 
        if self.intercept:
            intercept = np.ones((self._data_.shape[0], 1))
            self._data_ = np.hstack((intercept, self._data_))
        
        
        
        # Weight Calculation    
        weights = np.zeros(self._data_.shape[1])


        for step in range(self.step):
            scores = np.dot(self._data_, weights)
            predictions = self.sigmoid(scores)

            # Update weights with log likelihood gradient
            output_error_signal = self._targets_ - predictions

            gradient = np.dot(self._data_.T, output_error_signal)
            weights += self.learning_rate * gradient
            self._weights_ = weights

        return self
    
    
    # Predict Method (loop single prediction on sample data points)
    def predict(self, sample):
        preds=[]
        for feats in sample:
        
            scores = np.dot(feats, self._weights_)
            prediction = self.sigmoid(scores)
        
            if prediction > 0.5:
                preds.append(1)
            elif prediction <= 0.5:
                preds.append(0)
        return preds
    
                
    
    
    # Scoring Function
    def score(self, X, y):
        y_ = predict(X)
        score = 0
        for y1, y2 in zip(y, y_):
            if y1 != y2:
                score += 1
        score /= len(y)
        return score

In [6]:
    # Split Data:
    def split(xx, test_size):
        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=test_size)
        return X_train, X_test, y_train, y_test
    
    # Fit Classifier to data:
    def fit(classifier_class, X_train, y_train):
        classifier = classifier_class()
        classifier.fit(X_train, y_train)
        return classifier
    
    # Classifier Test with Confussion Matrix
    def test(classifier, X_test, y_test):
        conf_matr = np.zeros((3, 3)) # conf. matrix initiation: predicted, actual
        predictions = []
        # Fill up conf. matrix:
        for sample, target in zip(X_test, y_test):
            prediction = classifier.predict([sample])
            conf_matr[prediction, target] += 1
            predictions.append(prediction)
        
        # Accuracy :
        accuracy = accuracy_score(y_test, predictions)
        print('Accuracy: ','{0:.4f}%'.format(accuracy * 100))
        #conf. matrix:
        print('Confussion Matrix:\n', conf_matr)
        
    
    # Function to run everything
    def execution(test_size, classifier_class):
        X_train, X_test, y_train, y_test = split(dataset, test_size) # Split data call
        classifier = fit(classifier_class, X_train, y_train)  # Fit function call(fit func. fits classifier to data)
        test(classifier, X_test, y_test)  # Test function call (conf. matrix and accuracy score)

In [8]:
    # RUN
    execution(0.2, Logistic_Regression_DIY)  
    



Accuracy:  93.8596%
Confussion Matrix:
 [[37.  3.  0.]
 [ 4. 70.  0.]
 [ 0.  0.  0.]]


In [None]:
   # RUN
    execution(0.2, Logistic_Regression_DIY)  