In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import LeaveOneOut
from sklearn.linear_model import LogisticRegression, RidgeClassifier, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

data = pd.read_csv('diabetes.csv')

X = data.drop(columns='Outcome')  
y = data['Outcome']

# Standardizing all the predictors
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [4]:
#LOOCV set up
loo = LeaveOneOut()
errors = []

# Logistic Regression with LOOCV
for train_index, test_index in loo.split(X_scaled):
    
    #determining values for each one per index 
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    # here we are creating the logistic regression model
    model = LogisticRegression()
    model.fit(X_train, y_train)
    
    # Predicting data and also finding error
    y_pred = model.predict(X_test)
    #using 1 if not successufl and 0 if it is
    errors.append(1 if y_pred[0] != y_test.iloc[0] else 0)  # Comparing the outcome values

# Test error rate by averaging all the values
test_error_logistic = np.mean(errors)
print(f"Test Error Rate (Logistic Regression): {test_error_logistic:.4f}")

Test Error Rate (Logistic Regression): 0.2240


In [5]:
from sklearn.linear_model import RidgeClassifierCV

# alphas represent penaties for ridge classifiers
alphas = np.logspace(-4, 4, 50)

# setting up ridge model with alphas and fitting it with the scaled data
ridge_model = RidgeClassifierCV(alphas=alphas, store_cv_values=True) #ridge classifier CV as our LOOCV
ridge_model.fit(X_scaled, y)

#here we are finding OPTIMAL alpha for ridge classifer
optimal_pen = ridge_model.alpha_ #finds our 'best' penalty
test_error_ridge = 1 - ridge_model.score(X_scaled, y) # 1 - the accuracy for the error


print(f"Optimal Penalty : {optimal_pen:.4f}")
print(f"Test Error Rate : {test_error_ridge:.4f}")

Optimal Penalty (Ridge): 35.5648
Test Error Rate (Ridge Regression): 0.2240




In [6]:
from sklearn.model_selection import GridSearchCV

# Define penalty range for Lasso (L1)
param_grid = {'C': np.logspace(-4, 4, 50)}  

#model set up
lasso_model = LogisticRegression(penalty='l1', solver='liblinear')

# Use LOOCV to choose the best penalty parameter
grid = GridSearchCV(lasso_model, param_grid, cv=loo, scoring='accuracy') #specified LOOCV
grid.fit(X_scaled, y) #fitting the grid with values

#determining optimal pen and the error
optimal_pen_lasso = grid.best_params_['C']
test_error_lasso = 1 - grid.best_score_

print(f"Optimal Penalty: {optimal_pen_lasso:.4f}")
print(f"Test Error Rate : {test_error_lasso:.4f}")

Optimal Penalty (Lasso): 0.3907
Test Error Rate (Lasso Regression): 0.2227


In [9]:
import pandas as pd

df = pd.DataFrame({
    "Model": ["Logistic Regression", "Ridge Regression", "Lasso Regression"],
    "Optimal Penalty": [None, optimal_alpha_ridge, optimal_alpha_lasso], # no pen for Log Reg
    "Test Error Rate": [test_error_logistic, test_error_ridge, test_error_lasso]
})

df.head()

Unnamed: 0,Model,Optimal Penalty,Test Error Rate
0,Logistic Regression,,0.223958
1,Ridge Regression,35.564803,0.223958
2,Lasso Regression,0.390694,0.222656
