In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.base import BaseEstimator, ClassifierMixin
iris = datasets.load_iris(as_frame=True)
epsilon = 1e-9

In [2]:
iris.data.shape, iris.target.shape

((150, 4), (150,))

In [167]:
# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import accuracy_score, roc_auc_score, f1_score, classification_report

# X = iris.data
# y = iris.target

# # Split dataset into training and testing set
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Initialize and train Logistic Regression model
# log_reg = LogisticRegression()
# log_reg.fit(X_train, y_train)

# # Predict on the test set
# y_pred = log_reg.predict(X_test)

# # Calculate accuracy
# accuracy = accuracy_score(y_test, y_pred)
# accuracy

In [3]:
class LogisticRegression:
    def __init__(self, penalty='l1', *, tol=1e-6, C=0.01, lr=0.001, max_iter=500):
        self.penalty = penalty
        self.tol = tol
        self.C = C
        self.learning_rate = lr
        self.max_iter = max_iter
        self.weights = None
        self.bias = None
        self.losses = []

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def fit(self, X, y):
        def loss(true_labels, predicted_labels):
            term1 = true_labels * np.log(predicted_labels + epsilon)
            term2 = (1 - true_labels) * np.log(1 - predicted_labels + epsilon)
            return -np.mean(term1 + term2)
        num_samples, num_features = X.shape
        self.weights = np.zeros(num_features)
        self.bias = 0

        for _ in range(self.max_iter):
            linear_model = np.dot(X, self.weights) + self.bias
            predicted_labels = self._sigmoid(linear_model)
            self.losses.append(loss(y, predicted_labels))
            gradient = predicted_labels - y 
            if self.penalty == 'l1':
                weight_derivative = (1 / num_samples) * np.dot(X.T, gradient) + (1 / self.C) * np.sign(self.weights)
            elif self.penalty == 'l2':
                weight_derivative = (1 / num_samples) * np.dot(X.T, gradient) + (1 / self.C) * self.weights
            else:
                raise ValueError("Invalid value for penalty. Choose 'l1' or 'l2'.")
            bias_derivative = (1 / num_samples) * np.sum(gradient)
            self.weights -= self.learning_rate * weight_derivative
            self.bias -= self.learning_rate * bias_derivative

        return self
            
    def predict(self, X):
        linear_model_output = np.dot(X, self.weights) + self.bias
        probabilities = self._sigmoid(linear_model_output)
        class_predictions = [1 if probability > 0.5 else 0 for probability in probabilities]
        return np.array(class_predictions)
    
    def score(self, X, y):
        y_pred = self.predict(X)
        return accuracy_score(y, y_pred)
    
    def get_params(self, deep=True):
        return {"penalty": self.penalty, "tol": self.tol, "C": self.C, "lr": self.learning_rate, "max_iter": self.max_iter}
    
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self


In [169]:
def build_model(dataset):
    X, y = dataset.data, dataset.target

    # 1. split the dataset into training, validation and test dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

    # 2. use cross-validation techniques to choose model hyper-parameters penalty ('l1' or 'l2') and C
    param_grid = {
        'penalty': ['l1', 'l2'],
        'tol': [1e-7, 1e-6, 1e-5, 1e-4, 1e-3],
        'C': [0.01, 0.1, 1.0, 10.0, 100.0],
        'lr': [0.00001, 0.0001, 0.001, 0.01, 0.1],
        'max_iter': [500, 1000, 2000, 4000, 8000]
    }

    grid_search = GridSearchCV(LogisticRegression(), param_grid, cv=5, scoring='accuracy', verbose=100, n_jobs=-1)
    grid_search.fit(X_train, y_train)

    optimal_penalty = grid_search.best_params_['penalty']
    optimal_C = grid_search.best_params_['C']

    # 3. train model with optimal hyper-parameters
    model = LogisticRegression(penalty=optimal_penalty, C=optimal_C)

    # 4. train model with training + validation datasets
    model.fit(X_train, y_train)

    # 5. test model performance with test dataset
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    print(f'Best parameters: {grid_search.best_params_}')
    print(f'Accuracy: {accuracy * 100}%')

    return model

def test():
    model = build_model(iris)

In [170]:
test()

Fitting 5 folds for each of 1250 candidates, totalling 6250 fits
[CV 1/5; 1/1250] START C=0.01, lr=1e-05, max_iter=500, penalty=l1, tol=1e-07....
[CV 2/5; 1/1250] START C=0.01, lr=1e-05, max_iter=500, penalty=l1, tol=1e-07....
[CV 3/5; 1/1250] START C=0.01, lr=1e-05, max_iter=500, penalty=l1, tol=1e-07....
[CV 4/5; 1/1250] START C=0.01, lr=1e-05, max_iter=500, penalty=l1, tol=1e-07....
[CV 5/5; 1/1250] START C=0.01, lr=1e-05, max_iter=500, penalty=l1, tol=1e-07....
[CV 1/5; 2/1250] START C=0.01, lr=1e-05, max_iter=500, penalty=l1, tol=1e-06....
[CV 2/5; 2/1250] START C=0.01, lr=1e-05, max_iter=500, penalty=l1, tol=1e-06....
[CV 3/5; 2/1250] START C=0.01, lr=1e-05, max_iter=500, penalty=l1, tol=1e-06....
[CV 3/5; 1/1250] END C=0.01, lr=1e-05, max_iter=500, penalty=l1, tol=1e-07;, score=0.458 total time=   0.3s
[CV 4/5; 2/1250] START C=0.01, lr=1e-05, max_iter=500, penalty=l1, tol=1e-06....
[CV 4/5; 1/1250] END C=0.01, lr=1e-05, max_iter=500, penalty=l1, tol=1e-07;, score=0.292 total tim

Best parameters: {'C': 0.01, 'lr': 1e-05, 'max_iter': 500, 'penalty': 'l1', 'tol': 1e-07}
Accuracy: 73.33333333333333%