In [None]:
from copy import deepcopy
import pandas as pd
import numpy as np
import math


class LogisticRegression:
    """
    Logistic regression class.

    """

    def __init__(
        self,
        learning_rate: float = 0.1,
        n_iter: int = 10,
    ) -> None:
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = None

    def __str__(self) -> str:
        return f"{__class__.__name__} class: n_iter = {self.n_iter}, learning_rate = {self.learning_rate}"

    def fit(self, X: pd.DataFrame, y: pd.Series, verbose=False) -> None:
        # Reset index for X dataset
        X = deepcopy(X)
        X.reset_index(drop=True)

        # Fill the first column of feature matrix with "1" values (for intercept)
        X.insert(loc=0, column="intercept", value=1)

        num_observations, num_features = X.shape

        # Set initial weights equal to 1
        weights = [1] * num_features

        # Make iterations to update weights
        for i in range(1, self.n_iter + 1):
            # Calculate predictions
            y_predicted_logit = X.dot(weights)
            y_predicted_probability = 1 / (1 + math.e ** (-y_predicted_logit))

            eps = 1e-15
            LogLoss = (-1 / num_observations) * sum(
                y * np.log(y_predicted_probability + eps)
                + (1 - y) * np.log(1 - y_predicted_probability + eps)
            )

            # Log after verbose iterations
            if verbose and i % verbose == 0:
                print(f"{i} | loss: {LogLoss}")

            # Calculate gradient and new weights
            gradient = (1 / num_observations) * ((y_predicted_probability - y).dot(X))
            weights -= self.learning_rate * gradient

        # Final (best) weights
        self.weights = weights

    def get_coef(self) -> np.array:
        return np.array(self.weights[1:])
    
    def predict_proba(self, X) -> np.array:
        X = deepcopy(X)

        # Fill the first column of feature matrix with "1" values (for intercept)
        X.insert(loc=0, column="intercept", value=1)

        y_predicted_logit = X.dot(self.weights)
        y_predicted_probability = 1 / (1 + math.e ** (-y_predicted_logit))

        return np.array(y_predicted_probability)
    
    def predict(self, X) -> np.array:
        X = deepcopy(X)

        # Fill the first column of feature matrix with "1" values (for intercept)
    
        y_predicted_probability = self.predict_proba(X)
        y_predicted_class = np.where(y_predicted_probability > 0.5, 1, 0)

        return np.array(y_predicted_class)

In [25]:
# Create ficticious dataset

from sklearn.datasets import make_classification

X, y = make_classification(
    n_samples=1000, n_features=14, n_informative=10, random_state=42
)
X = pd.DataFrame(X)
y = pd.Series(y)
X.columns = [f"col_{col}" for col in X.columns]

In [26]:
# Create a class instance

sample_one = LogisticRegression(n_iter=50)

In [27]:
sample_one.fit(X, y, 10)

10 | loss: 1.8729518266222953
20 | loss: 1.1626945313705068
30 | loss: 0.8288847284427477
40 | loss: 0.654884590222696
50 | loss: 0.5606818660628201


In [28]:
sample_one.get_coef()

array([ 0.05247374,  0.63579113,  0.35707211,  0.20338172, -0.71400341,
        0.65306157, -0.30353191,  0.74286901,  0.14726892,  0.16403292,
        0.81238906,  0.0167088 ,  0.67663333,  1.21843872])