## **Linear Regression**

In [1]:
import numpy as np
import pandas as pd

class MyLineReg:
    def __init__(self, n_iter, learning_rate, weights=None, metric=None, reg=None, l1_coef=0, l2_coef=0, sgd_sample=None, random_state=42):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = weights
        self.metric = metric
        self.best_metric = None
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef
        self.reg = reg
        self.sgd_sample = sgd_sample
        self.random_state = random_state
        
    def __str__(self):
        return f"MyLineReg class: \n{' '.join([f'{key} = {value}' for key, value in self.__dict__.items()])}"

    def __mse_loss(self, y, y_pred):
        return ((y - y_pred) ** 2).mean()

    def __mse_gradient_helper(self, X, y, y_pred):
        return X.T @ (y_pred - y)
    
    def __metric(self, y_train, y_pred):
        if self.metric == 'mae':
            return (np.abs(y_train - y_pred)).mean()
        if self.metric == 'mse':
            return ((y_train - y_pred) ** 2).mean()
        if self.metric == 'rmse':
            return (((y_train - y_pred) ** 2).mean()) ** 0.5
        if self.metric == 'mape':
            return (np.abs((y_train - y_pred) / y_train) * 100).mean()
        if self.metric == 'r2':
            return 1 - (((y_train - y_pred) ** 2).sum() / ((y_train - y_train.mean()) ** 2).sum())  

    def __regularization(self):
        penalty = 0
        gradient_penalty = 0
        
        if self.reg == 'l1':
            penalty = self.l1_coef * np.sum(np.abs(self.weights))
            gradient_penalty = self.l1_coef * np.sign(self.weights)
        elif self.reg == 'l2':
            penalty = self.l2_coef * np.sum((self.weights) ** 2)
            gradient_penalty = 2 * self.l2_coef * self.weights
        elif self.reg == 'elasticnet':
            penalty = self.l1_coef * np.sum(np.abs(self.weights)) + self.l2_coef * np.sum((self.weights) ** 2)
            gradient_penalty = self.l1_coef * np.sign(self.weights) + 2 * self.l2_coef * self.weights
        return penalty, gradient_penalty

    def __sgd_helper(self, X_train, y_train, y_pred):
        if isinstance(self.sgd_sample, int):
            sgd_idx = random.sample(range(X.shape[0]), self.sgd_sample)
        elif isinstance(self.sgd_sample, float):
            part = round(self.sgd_sample * X.shape[0])
            sgd_idx = random.sample(range(X.shape[0]), part)
        X_train_sgd = X_train[sgd_idx]
        y_train_sgd = y_train.iloc[sgd_idx]
        y_pred_sgd = y_pred[sgd_idx]
        gradient = 2 / len(sgd_idx) * self.__mse_gradient_helper(X_train_sgd, y_train_sgd, y_pred_sgd)
        return gradient
            
    def fit(self, X_train, y_train, verbose=False):
        random.seed(self.random_state)
        
        number_of_observations, number_of_features = X_train.shape
        
        X_train = np.hstack([np.ones(number_of_observations).reshape(-1, 1), X_train.values])
        
        if not self.weights:
            self.weights = np.ones(number_of_features+1)
        
        y_pred = X_train @ self.weights
        
        if verbose:
            print(f"start | loss: {MSE} | {self.metric}: {self.__metric(y_train, y_pred)}")

        for i in range(1, self.n_iter+1):
            penalty, gradient_penalty = self.__regularization()
                
            y_pred = X_train @ self.weights
            loss = self.__mse_loss(y_train, y_pred) + penalty

            if sgd_sample:
                gradient = self.__sgd_helper(X_train, y_train, y_pred) + gradient_penalty
            else:
                gradient = 2 / number_of_observations * self.__mse_gradient_helper(X_train, y_train, y_pred) + gradient_penalty
            
            if callable(self.learning_rate):
                learning_rate_temp = self.learning_rate(i)
                self.weights -= learning_rate_temp * gradient
            else:
                self.weights -= self.learning_rate * gradient
            
            if verbose and i % verbose == 0:
                print(f" {i} | loss: {MSE} | {self.metric}: {self.__metric(y_train, y_pred)}")
        y_pred = X_train @ self.weights        
        self.best_metric = self.__metric(y_train, y_pred)
        
    def get_coef(self):
        return self.weights[1:]

    def predict(self, X_test):
        number_of_observations, number_of_features = X_test.shape
        X_test = np.hstack([np.ones(number_of_observations).reshape(-1, 1), X_test.values])
        return X_test @ self.weights

    def get_best_score(self):
        return self.best_metric

## **Logistic Regression**

In [None]:
import numpy as np
import pandas as pd

class MyLogReg:
    def __init__(self, n_iter, learning_rate, weights=None, metric=None, reg=None, l1_coef=0, l2_coef=0, sgd_sample=None, random_state=42):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = weights
        self.metric = metric
        self.__best_metric = None
        self.reg = reg
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef
        self.sgd_sample = sgd_sample
        self.random_state = random_state
        
    def __str__(self):
        return f"MyLineReg class: \n{' '.join([f'{key} = {value}' for key, value in self.__dict__.items()])}"

    def __sigmoid(self, value):
        return 1 / (1 + np.exp(-value))

    def __log_loss(self, y, y_pred):
        eps = 1e-15
        return -(y * np.log(y_pred + eps) + (1 - y) * np.log(1 - y_pred + eps)).mean()

    def __log_gradient_helper(self, X, y, y_pred):
        return ((y_pred - y) @ X)

    def __predict_helper(self, X):
        return self.__sigmoid(X @ self.weights)

    def __metric(self, y_train, y_scores):
        y_pred = np.where(y_scores > 0.5, 1, 0)
        
        TP = np.sum((y_train==1) & (y_pred==1))
        FP = np.sum((y_train==0) & (y_pred==1))
        FN = np.sum((y_train==1) & (y_pred==0))
        TN = np.sum((y_train==0) & (y_pred==0))
        
        if self.metric == 'accuracy':
            return (TP + TN) / (TP + TN + FP + FN)
        if self.metric == 'precision':
            return TP / (TP + FP)
        if self.metric == 'recall':
            return TP / (TP + FN)
        if self.metric == 'f1':
            precision = TP / (TP + FP)
            recall = TP / (TP + FN) 
            return 2 * precision * recall / (precision + recall) 
        if self.metric == 'roc_auc':
            data = pd.DataFrame({'y_true': y_train, 'y_scores': y_scores})
            data = data.sort_values(by='y_scores', ascending=False)
        
            P = np.sum(data['y_true'] == 1)
            N = np.sum(data['y_true'] == 0)
        
            cumulative_positives = 0
            auc = 0
        
            for _, row in data.iterrows():
                if row['y_true'] == 1:
                    cumulative_positives += 1
                else:
                    auc += cumulative_positives
        
            return auc / (P * N)

    def __regularization(self):
        penalty = 0
        gradient_penalty = 0
        
        if self.reg == 'l1':
            penalty = self.l1_coef * np.sum(np.abs(self.weights))
            gradient_penalty = self.l1_coef * np.sign(self.weights)
        elif self.reg == 'l2':
            penalty = self.l2_coef * np.sum((self.weights) ** 2)
            gradient_penalty = 2 * self.l2_coef * self.weights
        elif self.reg == 'elasticnet':
            penalty = self.l1_coef * np.sum(np.abs(self.weights)) + self.l2_coef * np.sum((self.weights) ** 2)
            gradient_penalty = self.l1_coef * np.sign(self.weights) + 2 * self.l2_coef * self.weights
        return penalty, gradient_penalty

    def __sgd_helper(self, X_train, y_train, y_pred):
        if isinstance(self.sgd_sample, int):
            sgd_idx = random.sample(range(X.shape[0]), self.sgd_sample)
        elif isinstance(self.sgd_sample, float):
            part = round(self.sgd_sample * X.shape[0])
            sgd_idx = random.sample(range(X.shape[0]), part)
        X_train_sgd = X_train[sgd_idx]
        y_train_sgd = y_train.iloc[sgd_idx]
        y_pred_sgd = y_pred[sgd_idx]
        gradient = self.__log_gradient_helper(X_train_sgd, y_train_sgd, y_pred_sgd) / len(sgd_idx)
        return gradient
    
    def fit(self, X_train, y_train, verbose=False):
        random.seed(self.random_state)

        number_of_observations, number_of_features = X_train.shape
        
        X_train = np.hstack([np.ones(number_of_observations).reshape(-1, 1), X_train.values])

        if not self.weights:
            self.weights = np.ones(number_of_features + 1)

        y_pred = self.__predict_helper(X_train)
        
        if verbose:
            print(f"start | loss: {loss} | {self.metric}: {self.__metric(y_train, y_pred)}")

        for i in range(1, self.n_iter+1):
            penalty, gradient_penalty = self.__regularization()
            
            y_pred = self.__predict_helper(X_train)
            loss = self.__log_loss(y_train, y_pred) + penalty
            
            if sgd_sample:
                gradient = self.__sgd_helper(X_train, y_train, y_pred) + gradient_penalty
            else:
                gradient = self.__log_gradient_helper(X_train, y_train, y_pred) / number_of_observations + gradient_penalty

            if callable(self.learning_rate):
                learning_rate_temp = self.learning_rate(i)
                self.weights -= learning_rate_temp * gradient
            else:
                self.weights -= self.learning_rate * gradient

            if verbose and i % verbose == 0:
                    print(f" {i} | loss: {loss} | {self.metric}: {self.__metric(y_train, y_pred)}")
                
        y_pred = self.__predict_helper(X_train)
        self.__best_metric = self.__metric(y_train, y_pred)
    
    def predict(self, X_test):
        number_of_observations, number_of_features = X_test.shape

        if type(X_test) == pd.core.frame.DataFrame: 
            X_test = np.hstack([np.ones(number_of_observations).reshape(-1, 1), X_test.values])
        probability = self.__predict_helper(X_test)
        y_pred = np.where(probability > 0.5, 1, 0)
        
        return y_pred

    def predict_proba(self, X_test):
        number_of_observations, number_of_features = X_test.shape
        
        X_test = np.hstack([np.ones(number_of_observations).reshape(-1, 1), X_test.values])

        probability = self.__predict_helper(X_test)
        
        return probability

    def get_coef(self):
        return self.weights[1:]

    def get_best_score(self):
        return self.__best_metric