In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#intiating loggers
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

import numpy as np

import numpy as np

def pred(x: np.ndarray, b0: float, b1: float) -> float:
    """
    Predicts the probability of a binary outcome (class 1) given the input features and coefficients.

    Parameters:
    x (np.ndarray): The input features as a numpy array.
    b0 (float): The intercept of the logistic regression model.
    b1 (float): The coefficient of the logistic regression model.

    Returns:
    float: The probability of class 1.

    """
    val = b0 + b1 * x
    return 1 / (1 + np.exp(-val))

def l1(b0: float, b1: float, alpha: float) -> float:
    """
    Calculates the L1 regularization penalty.

    Parameters:
    b0 (float): The intercept of the logistic regression model.
    b1 (float): The coefficient of the logistic regression model.
    alpha (float): Regularization strength.

    Returns:
    float: The L1 regularization penalty value.

    """
    return alpha * (abs(b0) + abs(b1)) / 2

def l2(b0: float, b1: float, alpha: float) -> float:
    """
    Calculates the L2 regularization penalty.

    Parameters:
    b0 (float): The intercept of the logistic regression model.
    b1 (float): The coefficient of the logistic regression model.
    alpha (float): Regularization strength.

    Returns:
    float: The L2 regularization penalty value.

    """
    return alpha * (b0**2 + b1**2) / 2

def elasticnet(b0: float, b1: float, alpha: float, l1_ratio: float) -> float:
    """
    Calculates the elastic net regularization penalty.

    Parameters:
    b0 (float): The intercept of the logistic regression model.
    b1 (float): The coefficient of the logistic regression model.
    alpha (float): Regularization strength.
    l1_ratio (float): The mixing parameter for the L1 and L2 penalties.

    Returns:
    float: The elastic net regularization penalty value.

    """
    return l1_ratio * l1(b0, b1, alpha) + (1 - l1_ratio) * l2(b0, b1, alpha)

def comp_class_weights(y_train: np.ndarray) -> np.ndarray:
    """
    Computes class weights to address class imbalance.

    Parameters:
    y_train (np.ndarray): The target variable as a numpy array containing binary labels (0 or 1).

    Returns:
    np.ndarray: An array of class weights.

    """
    _, counts = np.unique(y_train, return_counts=True)
    return len(y_train) / (2 * counts)

#LR function
def LR(x: np.ndarray, y: np.ndarray, alpha = 1, debug= False, penalty: function =l1, l1_ratio=0.5, class_weights: tuple= None):
    """
    Logistic Regression with Regularization (LR)

    This function performs Logistic regression with regularization (L1, L2, or elastic net) to fit a model to the input data.
    
    Parameters:
    x (np.ndarray): The input features as a numpy array.
    y (np.ndarray): The target variable as a numpy array.
    alpha (float, optional): Regularization strength (default=1).
    debug (bool, optional): If True, enables debugging mode (default=False).
    penalty (function, optional): Regularization penalty function. Supported values are l1, l2, or elasticnet (default=l1).
    l1_ratio (float, optional): The mixing parameter for elastic net when penalty is set to elasticnet (default=0.5).
    class_weights (tuple, optional): Tuple containing class weights to address class imbalance. Set to None by default.

    Returns:
    b0 (float): Intercept of the fitted model.
    b1 (float): Coefficient of the fitted model.

    Note:
    The class_weights parameter is a tuple containing the weights for the two classes in the dataset.
      If set to None, the class weights are automatically calculated and used. If set to "balanced", 
      the class weights are automatically calculated and used, but the weights 
      are assigned inversely proportional to the class frequencies in the input data.

    Raises:
    ValueError: If l1_ratio is set and penalty is not set to elasticnet.
    
    """

    #debug statement
    if debug:
        logger.info(f"Input: x = {x}, y = {y}, alpha = {alpha}, penalty = {penalty}, l1_ratio = {l1_ratio}")

    #assigning class weights
    if class_weights == "balanced":
        class_weights = comp_class_weights(y_train)
    else:
        class_weights = (0,0)

    #checking l1_ratio usage
    if (penalty in [l1,l2]) and l1_ratio != 0.5:
        raise ValueError("l1_ratio can only be set when penalty is set to 'elasticnet' ")


    b0, b1 = class_weights
    l = 0.001
    iters = 1000

    for i in range(iters):
        
        #debug statement
        if debug:
            logger.info(f"Iteration {i}: b0 = {b0}, b1 = {b1}")

        y_pred = pred(x, b0, b1)

        #change of weights when penalty is set to l1
        if penalty==l1:
            
            db0 = -2*np.mean((y-y_pred)*y_pred*(1-y_pred)) + l1(b0,b1,alpha)
            db1 = -2*np.mean((y-y_pred)*y_pred*(1-y_pred)*x) + l1(b0,b1,alpha)

        #change of weights when penalty is set to l2
        elif penalty==l2:
            db0 = -2*np.mean((y-y_pred)*y_pred*(1-y_pred)) + l2(b0,b1,1)
            db1 = -2*np.mean((y-y_pred)*y_pred*(1-y_pred)*x) + l2(b0,b1,1)

        #change of weights when penalty is set to elasticnet
        elif penalty==elasticnet:
            db0 = -2*np.mean((y-y_pred)*y_pred*(1-y_pred)) + elasticnet(b0,b1,1,l1_ratio)
            db1 = -2*np.mean((y-y_pred)*y_pred*(1-y_pred)*x) + elasticnet(b0,b1,1,l1_ratio)

        b0 = b0-l*db0
        b1 = b1-l*db1

    return b0, b1

#convert the y_pred values to binary output
def bin(x, b0, b1):

    """
    Converts the predicted probabilities to binary output.

    Parameters:
    x (np.ndarray): The input features as a numpy array.
    b0 (float): The intercept of the logistic regression model.
    b1 (float): The coefficient of the logistic regression model.

    Returns:
    np.ndarray: An array of binary output values (0 or 1).

    """

    y_pred = pred(x, b0, b1)
    return [1 if i > 0.5 else 0 for i in y_pred]



#loading dataset
data = load_breast_cancer()
X = data.data
y = data.target
df = pd.DataFrame(data=data.data, columns=data.feature_names)
df['target'] = data.target

#test, train split
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, :-1], df.iloc[:, -1], test_size=0.25, random_state=42)

#normalizing the data for better outputs
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#calling the LR function and printing the optimal output weights
b0, b1 = LR(X_train[:, 0], y_train, alpha=0.8)
print("b0:", b0, "b1:", b1)

#calculating the accuracy
y_pred = bin(X_test[:, 0], b0, b1)
print("Accuracy:", np.mean(y_pred == y_test))


: 