In [1]:
import numpy as np

# Hypothesis
$$h(x) = \frac{1}{1 + e^{-\theta^T x + b}}$$

# Cost function
$$J(\theta) = -\frac1m \sum_{i=1}^{m} \left[y^{(i)}\log(h(x^{(i)})) + (1-y^{(i)}) \log(1-h(x^{(i)}))\right] $$

# Partial derivatives of the cost function
$$\frac{\partial J}{\partial \theta_j} = \frac1m \sum_{i=1}^{m} (h(x^{(i)}) - y^{(i)}) x_j^{(i)}$$

In [2]:
class LogisticRegression:
    
    def __init__(self):
        self.weights = None
        self.bias = None
        
    # Class methods
    def fit(self, X, y, α=0.001, max_iter=10**6, tols=1e-8):
        m, n = X.shape
        old_weights = np.zeros(n)
        old_bias = 0
        
        # Gradient descent
        for _ in range(max_iter):
            new_weights = old_weights - α * (1/m) * np.dot(X.T, (self.__sigmoid(np.dot(X, old_weights) + old_bias) - y))
            new_bias = old_bias - α * (1/m) * np.sum(self.__sigmoid(np.dot(X, old_weights) + old_bias) - y)
            
            # Stopping criterion
            if (np.linalg.norm(new_weights - old_weights) > tols) and (np.abs(new_bias - old_bias) > tols):
                old_weights = new_weights
                old_bias = new_bias
                
            else:
                print("Converged after {} iterations.".format(_))
                break
                
        # Best wieghts and bias
        self.weights = new_weights
        self.bias = new_bias
    
    def predict(self, X):
        y_predicted = self.__sigmoid(np.dot(X, self.weights) + self.bias)
        return np.array([1 if i >= 0.5 else 0 for i in y_predicted])
    
    def accuracy_score(self, y_test_true, y_test_pred):
        return np.sum(y_test_true == y_test_pred) / len(y_test_true)
    
    def __sigmoid(self,z):
        return 1 / (1 + np.exp(-z))

# Import breast cancer dataset

In [3]:
from sklearn.datasets import load_breast_cancer

bc_data = load_breast_cancer()
X, y = bc_data.data, bc_data.target


# Split into train and test samples
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train the model

In [4]:
logit = LogisticRegression()
logit.fit(X_train,y_train, α=0.0001)

Converged after 611 iterations.


# Make predictions

In [5]:
y_test_predicted = logit.predict(X_test)

# Accuracy Score

In [6]:
logit.accuracy_score(y_test, y_test_predicted)

0.8947368421052632

# Scikit-learn implementation

In [7]:
from sklearn.linear_model import LogisticRegression as sklearnLogit

clf = sklearnLogit(random_state=0).fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [8]:
from sklearn.metrics import accuracy_score

accuracy_score(clf.predict(X_test), y_test)

0.9210526315789473