In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
df = pd.read_csv('car_data.csv')
df

Unnamed: 0,User ID,Gender,Age,AnnualSalary,Purchased
0,385,Male,35,20000,0
1,681,Male,40,43500,0
2,353,Male,49,74000,0
3,895,Male,40,107500,1
4,661,Male,25,79000,0
...,...,...,...,...,...
995,863,Male,38,59000,0
996,800,Female,47,23500,0
997,407,Female,28,138500,1
998,299,Female,48,134000,1


In [10]:
# map the column value in Purchase, 0 to -1
df['Purchased'] = df['Purchased'].map({0:-1, 1:1})
df['Gender'] = df['Gender'].map({'Male': -1, 'Female': 1 })
df

Unnamed: 0,User ID,Gender,Age,AnnualSalary,Purchased
0,385,-1,35,20000,-1
1,681,-1,40,43500,-1
2,353,-1,49,74000,-1
3,895,-1,40,107500,1
4,661,-1,25,79000,-1
...,...,...,...,...,...
995,863,-1,38,59000,-1
996,800,1,47,23500,-1
997,407,1,28,138500,1
998,299,1,48,134000,1


In [70]:
from scipy.optimize import minimize
from sklearn.linear_model import LogisticRegression

def loss(beta, X, y, lam, p):
    # Logistic loss with L_p regularization
    logistic_loss = np.mean(np.log(1 + np.exp(-y * np.dot(X, beta))))
    reg_loss = lam * np.linalg.norm(beta, p)
    return logistic_loss + reg_loss

def gradient(beta, X, y, lam, p):
    # Gradient of logistic loss
    logistic_grad = np.dot(X.T, -y / (1 + np.exp(y * np.dot(X, beta))))
    # Gradient of regularization term
    if p == 1:
        reg_grad = lam * np.sign(beta)
    else:
        norm_beta_p_minus_1 = np.power(np.abs(beta), p - 1)
        reg_grad = lam * np.multiply(np.sign(beta), norm_beta_p_minus_1)
        reg_grad /= np.power(np.linalg.norm(beta, p), p-1)
    
    return logistic_grad + reg_grad

def logistic_regression(X, y, lam, p):
    # Initialize beta with zeros or logistic regression coefficients as a starting point
    beta_init = LogisticRegression().fit(X, y).coef_[0]   
    # Define the objective function (loss) to minimize
    obj_func = lambda beta: loss(beta, X, y, lam, p)
    # Define the gradient of the objective function
    obj_grad = lambda beta: gradient(beta, X, y, lam, p)
    # Use scipy.optimize.minimize with the 'BFGS' method, providing the gradient
    result = minimize(fun=obj_func, x0=beta_init, jac=obj_grad, method='BFGS')
    print(result.x)
    return result.x

def predict(X, y, beta):
    # y_one is the probability of class 1
    y_one = np.exp(np.dot(X, beta)) / (1 + np.exp(np.dot(X, beta)))
    # y_pred is the predicted class
    y_pred = np.sign(y_one - 0.5)

    # check the correct accuracy 
    accuracy = np.mean(y == y_pred)
    return accuracy
    


Here, logistic regression is to return the beta after fitting

In [112]:
def generate_logistic_regression_data(n_samples, n_features, beta_true):
    """
    Generate data for logistic regression.
    
    Parameters:
    - n_samples: int, the number of samples.
    - n_features: int, the number of features.
    - beta_true: array-like, the true coefficients, including the intercept as the first element.
    
    Returns:
    - X: array, shape (n_samples, n_features), the feature matrix.
    - y: array, shape (n_samples,), the target variable with values in {-1, 1}.
    """
    # Generate feature matrix X with an intercept (bias) term
    X = np.random.randn(n_samples, n_features)
    X = np.hstack((np.ones((n_samples, 1)), X))  # Adding intercept term
    
    # Compute logits (linear combination + intercept)
    logits = np.dot(X, beta_true)
    
    # Apply logistic function to get probabilities
    probabilities = 1 / (1 + np.exp(-logits))
    
    # Generate binary outcomes based on probabilities
    y = np.where(probabilities >= 0.5, 1, -1)

    # add noise to the data
    X = X + np.random.normal(0, 10, X.shape)
    # after y from 1 to -1 or -1 to 1 by 10% of the data
    for i in range(int(n_samples * 0.1)):
        if y[i] == 1:
            y[i] = -1
        else:
            y[i] = 1
    
    return X[:, 1:], y  # Return X without intercept term and y

# Example usage
n_samples = 1000
n_features = 5
beta_true = np.array([0.5, -1, 2, 0.5, -0.25, 0.75])  # Including intercept

X, y = generate_logistic_regression_data(n_samples, n_features, beta_true)

print("Feature matrix (X) shape:", X.shape)
print("Target variable (y) shape:", y.shape)


Feature matrix (X) shape: (1000, 5)
Target variable (y) shape: (1000,)


In [120]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

p_values = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
lam_values = [1, 2, 3, 4, 5, 6]

'''
beta_test = logistic_regression(X_train, y_train, lam=0.1, p=1)
print(beta_test)

accuracy = predict(X_test, y_test, beta_test)
print(accuracy)
'''

# for each p-values, and lam_values, store the accuracy in dictionary
accuracy_dict = {}

p = p_values[4]
accuracy_dict[p] = []
for lam in lam_values:
    beta = logistic_regression(X_train, y_train, lam, p)
    accuracy = predict(X_test, y_test, beta)
    accuracy_dict[p].append(accuracy)

print(accuracy_dict)


plain_beta = LogisticRegression().fit(X_train, y_train).coef_[0]
plain_accuracy = predict(X_test, y_test, plain_beta)
plain_accuracy

[-0.01271893  0.00468129  0.00280432 -0.00074565 -0.00191272]
[-0.01257002  0.00472616  0.00290424 -0.0005915  -0.00182738]
[-0.01253636  0.00469647  0.00282714 -0.00068934 -0.00189047]
[-0.01245492  0.00469751  0.00282311 -0.00068419 -0.0018944 ]
[-0.01237216  0.00469928  0.0028135  -0.00067717 -0.00189383]
[-0.01229101  0.00469989  0.00281104 -0.00067254 -0.00189899]
{5: [0.518, 0.518, 0.516, 0.516, 0.516, 0.516]}


0.516