# Logistic Regression

In [1]:
import numpy as np

class LogisticRegression:
    def __init__(self, epochs=1000, learning_rate=0.01):
        self.learning_rate = learning_rate
        self.epochs = epochs
        
    def fit(self, X, y):
        N, self.p = X.shape         
        X = self._pad_ones(X)
        if y.ndim == 1:
            y = np.expand_dims(y, axis=1)

        self.beta = np.random.rand(self.p + 1, 1)
        for _ in range(self.epochs):
            y_hat = self._predict_probs(X)
            update = (self.learning_rate / N) * np.dot(X.T, y_hat - y)
            self.beta -= update

    def predict(self, X):
        p = X.shape[1]
        if p != self.p:
            raise ValueError("Training features doesn't match test features.")
        X = self._pad_ones(X)
        probs = self._predict_probs(X)
        return np.round(probs).astype(int)

    def predict_probs(self, X):
        self._pad_ones(X)
        return self._predict_probs(X)
    
    def _predict_probs(self, X):
        return self._sigmoid(np.dot(X, self.beta))

    def _sigmoid(self, X):
        return 1 / (1 + np.exp(-X))
    
    def _pad_ones(self, X):
        N = X.shape[0]
        return np.concatenate([np.ones((N, 1)), X], axis=1)

In [2]:
from sklearn.datasets import load_iris
from sklearn.metrics import f1_score, accuracy_score
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split

In [3]:
X, y = load_iris(return_X_y=True)
X = normalize(X)
y = (y != 0).astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [4]:
clf = LogisticRegression(epochs=1000, learning_rate=0.1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
f1 = f1_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
print("f1_score: {:f}".format(f1))
print("accuracy: {:f}".format(accuracy))

f1_score: 1.000000
accuracy: 1.000000
