In [1]:
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, log_loss
import matplotlib.pyplot as plt
import time
from sklearn.exceptions import ConvergenceWarning
import warnings

warnings.filterwarnings("ignore", category=ConvergenceWarning)

def train_and_evaluate_logistic_regression(x_train, y_train, x_val, y_val,
                                            x_test, y_test, penalty='l2', C=1.0,
                                            solver='lbfgs', max_iter=100, l1_ratio=None):
    """Trains and evaluates Logistic Regression with specified parameters."""

    start_time = time.time()
    if penalty == 'elasticnet':
        model = LogisticRegression(penalty=penalty, C=C, solver=solver,
                                   max_iter=max_iter, multi_class='multinomial',
                                   l1_ratio=l1_ratio)
    else:
        model = LogisticRegression(penalty=penalty, C=C, solver=solver,
                                   max_iter=max_iter, multi_class='multinomial')  # For Fashion-MNIST
    model.fit(x_train, y_train)
    training_time = time.time() - start_time

    y_pred = model.predict(x_test)
    accuracy = accuracy_score(y_test, y_pred)
    # Validation Loss (using log_loss)
    y_val_prob = model.predict_proba(x_val)
    val_loss = log_loss(y_val, y_val_prob)

    return model, training_time, accuracy, val_loss

Load PCA Data

In [2]:
# Load PCA Data
x_train_pca = np.load('x_train_pca95.npy')
x_val_pca = np.load('x_val_pca95.npy')
x_test_pca = np.load('x_test_pca95.npy')
y_train = np.load('y_train.npy')
y_val = np.load('y_val.npy')
y_test = np.load('y_test.npy')

Scaling PCA Data Scales the pre-computed PCA-transformed data using StandardScaler. This is still crucial for Logistic Regression.

In [3]:
# Scaling (Crucial after PCA)
scaler_pca = StandardScaler()
x_train_pca_scaled = scaler_pca.fit_transform(x_train_pca)
x_val_pca_scaled = scaler_pca.transform(x_val_pca)
x_test_pca_scaled = scaler_pca.transform(x_test_pca)

Model Training and Evaluation

In [4]:
# ---------------------------------------------------------------------------------
# 1. Model Training and Evaluation
# ---------------------------------------------------------------------------------

print(" --- Model Training and Evaluation --- ")

# L2 (Ridge)
l2_model_pca, l2_time_pca, l2_acc_pca, l2_vloss_pca = train_and_evaluate_logistic_regression(
    x_train_pca_scaled, y_train, x_val_pca_scaled, y_val, x_test_pca_scaled, y_test,
    penalty='l2', C=0.01, solver='lbfgs', max_iter=100
)

# L1 (Lasso)
l1_model_pca, l1_time_pca, l1_acc_pca, l1_vloss_pca = train_and_evaluate_logistic_regression(
    x_train_pca_scaled, y_train, x_val_pca_scaled, y_val, x_test_pca_scaled, y_test,
    penalty='l1', C=0.1, solver='saga', max_iter=100
)

# Elastic Net
elasticnet_model_pca, en_time_pca, en_acc_pca, en_vloss_pca = train_and_evaluate_logistic_regression(
    x_train_pca_scaled, y_train, x_val_pca_scaled, y_val, x_test_pca_scaled, y_test,
    penalty='elasticnet', C=0.1, solver='saga', max_iter=100, l1_ratio=0.1  # alpha=0.1 is controlled by C
)

print("L2 (Ridge) + PCA Test Accuracy:", l2_acc_pca)
print("L1 (Lasso) + PCA Test Accuracy:", l1_acc_pca)
print("ElasticNet + PCA Test Accuracy:", en_acc_pca)

print("L2 (Ridge) + PCA Validation Loss:", l2_vloss_pca)
print("L1 (Lasso) + PCA Validation Loss:", l1_vloss_pca)
print("ElasticNet + PCA Validation Loss:", en_vloss_pca)

 --- Model Training and Evaluation --- 




L2 (Ridge) + PCA Test Accuracy: 0.8484761904761905
L1 (Lasso) + PCA Test Accuracy: 0.8515238095238096
ElasticNet + PCA Test Accuracy: 0.8516190476190476
L2 (Ridge) + PCA Validation Loss: 0.45821480429951034
L1 (Lasso) + PCA Validation Loss: 0.43002364203641424
ElasticNet + PCA Validation Loss: 0.43200378071635814


Hyperparameter Optimization (L2 Example)

In [5]:
# ---------------------------------------------------------------------------------
# 2. Hyperparameter Optimization
# ---------------------------------------------------------------------------------

print("\n--- Hyperparameter Optimization ---")

# L2 Regularization Strength Tuning
param_grid_l2 = {
    'C': [0.001, 0.01, 0.1, 1.0, 10.0],  # Inverse of regularization strength
    'solver': ['lbfgs', 'saga', 'liblinear']
}

grid_search_l2 = GridSearchCV(
    LogisticRegression(penalty='l2', max_iter=100, multi_class='multinomial'),
    param_grid_l2, cv=3, scoring='accuracy', verbose=1
)
grid_search_l2.fit(x_train_pca_scaled, y_train)

best_model_l2 = grid_search_l2.best_estimator_
print("Best Parameters (L2 + PCA):", grid_search_l2.best_params_)
y_pred_best_l2 = best_model_l2.predict(x_test_pca_scaled)
print("Test Accuracy (Best Model - L2 + PCA):", accuracy_score(y_test, y_pred_best_l2))


--- Hyperparameter Optimization ---
Fitting 3 folds for each of 15 candidates, totalling 45 fits


15 fits failed out of a total of 45.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
  File "d:\University Work\6th Semester\ML\ML_Semester-Project_i222369_i222325\tfenv311\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "d:\University Work\6th Semester\ML\ML_Semester-Project_i222369_i222325\tfenv311\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\University Work\6th Semester\ML\ML_Semester-Project_i222369_i222325\tfenv311\Lib\site-packages\sklearn\l

Best Parameters (L2 + PCA): {'C': 10.0, 'solver': 'lbfgs'}
Test Accuracy (Best Model - L2 + PCA): 0.8517142857142858


Overfitting Analysis

In [6]:
# ---------------------------------------------------------------------------------
# 3. Overfitting Analysis
# ---------------------------------------------------------------------------------

print("\n--- Overfitting Analysis ---")

# Training Accuracy (for overfitting analysis)
y_train_pred_l2 = l2_model_pca.predict(x_train_pca_scaled)
train_accuracy_l2 = accuracy_score(y_train, y_train_pred_l2)
print("Training Accuracy (L2 + PCA):", train_accuracy_l2)
print("Test Accuracy (L2 + PCA):", l2_acc_pca)
print("Overfitting Gap (L2 + PCA):", train_accuracy_l2 - l2_acc_pca)

# Confusion Matrix (for detailed analysis)
conf_mat_l2 = confusion_matrix(y_test, l2_model_pca.predict(x_test_pca_scaled))
print("\nConfusion Matrix (L2 + PCA):\n", conf_mat_l2)

# Classification Report
class_report_l2 = classification_report(y_test, l2_model_pca.predict(x_test_pca_scaled))
print("\nClassification Report (L2 + PCA):\n", class_report_l2)


--- Overfitting Analysis ---
Training Accuracy (L2 + PCA): 0.8553206253316462
Test Accuracy (L2 + PCA): 0.8484761904761905
Overfitting Gap (L2 + PCA): 0.006844434855455717

Confusion Matrix (L2 + PCA):
 [[ 852    1   19   56    2    6   84    2   17    1]
 [   5 1014    6   26    2    1    4    1    2    0]
 [  19    2  781   12  117    3   96    0    6    0]
 [  44    8    8  953   39    5   44    0    2    0]
 [   0    0   71   33  792    1   93    1    4    0]
 [   2    1    1    1    0  990    0   53    6   21]
 [ 169    3  118   27  120    6  617    2   21    0]
 [   0    0    0    0    0   42    0  948    2   40]
 [   2    2    2    7    2    9   16    2  947    4]
 [   0    0    0    0    0   20    0   44    3 1015]]

Classification Report (L2 + PCA):
               precision    recall  f1-score   support

           0       0.78      0.82      0.80      1040
           1       0.98      0.96      0.97      1061
           2       0.78      0.75      0.76      1036
           3

Validation Loss Approximation

In [9]:
# ---------------------------------------------------------------------------------
# Validation Loss Curves (Approximation using GridSearchCV)
# ---------------------------------------------------------------------------------

print("\n--- Validation Loss Approximation ---")

# Validation loss approximation from GridSearchCV (L2)
# GridSearchCV doesn't directly give "curves" but provides the best score for each fold.
# We can use these scores as a rough estimate of validation performance during hyperparameter tuning
print("CV Results (L2):")
print(grid_search_l2.cv_results_)

# You can extract mean validation scores for each C value to plot a rough "validation curve"
mean_val_scores_l2 = grid_search_l2.cv_results_['mean_test_score']
c_values_l2 = param_grid_l2['C']

# Check if the lengths match before plotting
if len(c_values_l2) != len(mean_val_scores_l2):
    print("Error: Lengths of C values and validation scores do not match!")
    print(f"Length of C values: {len(c_values_l2)}")
    print(f"Length of validation scores: {len(mean_val_scores_l2)}")
else:
    plt.figure(figsize=(8, 6))
    plt.plot(c_values_l2, mean_val_scores_l2, marker='o')
    plt.xscale('log')
    plt.xlabel("C (Inverse Regularization Strength)")
    plt.ylabel("Mean Validation Accuracy")
    plt.title("L2 Regularization: Validation Performance vs. C")
    plt.grid(True)
    plt.show()


--- Validation Loss Approximation ---
CV Results (L2):
{'mean_fit_time': array([6.64609591e-01, 8.45526822e+00, 2.76099046e-02, 1.17995516e+00,
       2.73832343e+01, 2.35599677e-02, 2.19994227e+00, 6.60255058e+01,
       1.68790023e-02, 4.22898181e+00, 6.63781543e+01, 2.90771325e-02,
       4.10353891e+00, 5.10178529e+01, 2.09661325e-02]), 'std_fit_time': array([0.04100763, 0.16289731, 0.00811142, 0.0309961 , 1.34995151,
       0.00639098, 0.05387687, 0.65761219, 0.00319146, 0.19742891,
       1.24264391, 0.00418011, 0.1142509 , 0.98644827, 0.01133162]), 'mean_score_time': array([0.03516499, 0.0068318 , 0.        , 0.03098838, 0.00569065,
       0.        , 0.027625  , 0.01112429, 0.        , 0.03512343,
       0.00560244, 0.        , 0.02364302, 0.00563542, 0.        ]), 'std_score_time': array([0.00180624, 0.00653722, 0.        , 0.00377298, 0.0080478 ,
       0.        , 0.00848139, 0.00326113, 0.        , 0.00944489,
       0.00792305, 0.        , 0.00189998, 0.00796969, 0.      