#Imports and CIFAR-10 Dataset Setup

In [15]:
!pip install keras



In [16]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow.keras.datasets import cifar10
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import warnings
warnings.filterwarnings("ignore")

# Load CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Flatten label arrays to 1D
y_train = y_train.ravel()
y_test = y_test.ravel()

# Display dataset dimensions
print("CIFAR-10 Dataset Loaded")
print(f"Training images: {x_train.shape}")
print(f"Test images:     {x_test.shape}")
print(f"Training labels: {y_train.shape}")
print(f"Test labels:     {y_test.shape}")


ImportError: Traceback (most recent call last):
  File "C:\Users\ehayfo1\AppData\Local\anaconda3\Lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 73, in <module>
    from tensorflow.python._pywrap_tensorflow_internal import *
ImportError: DLL load failed while importing _pywrap_tensorflow_internal: A dynamic link library (DLL) initialization routine failed.


Failed to load the native TensorFlow runtime.
See https://www.tensorflow.org/install/errors for some common causes and solutions.
If you need help, create an issue at https://github.com/tensorflow/tensorflow/issues and include the entire stack trace above this error message.

#Baseline Logistic Regression (Raw RGB Pixels)

In [2]:
# Flatten image data to 1D vectors
x_train_rgb = x_train.reshape((x_train.shape[0], -1))
x_test_rgb = x_test.reshape((x_test.shape[0], -1))

# Train logistic regression on a subset of training data
model_rgb = LogisticRegression(
    max_iter=300,
    solver='saga',
    multi_class='multinomial',
    n_jobs=-1
)

model_rgb.fit(x_train_rgb[:10000], y_train[:10000])

# Predict on test set
y_pred_rgb = model_rgb.predict(x_test_rgb)

# Evaluate performance
accuracy_rgb = accuracy_score(y_test, y_pred_rgb)
print(f"Baseline Accuracy (Raw RGB): {accuracy_rgb:.4f}")
print(classification_report(y_test, y_pred_rgb))


Baseline Accuracy (Raw RGB): 0.3471
              precision    recall  f1-score   support

           0       0.40      0.40      0.40      1000
           1       0.42      0.38      0.40      1000
           2       0.27      0.27      0.27      1000
           3       0.23      0.21      0.22      1000
           4       0.31      0.28      0.29      1000
           5       0.28      0.27      0.28      1000
           6       0.37      0.38      0.37      1000
           7       0.40      0.40      0.40      1000
           8       0.40      0.52      0.45      1000
           9       0.38      0.36      0.37      1000

    accuracy                           0.35     10000
   macro avg       0.35      0.35      0.35     10000
weighted avg       0.35      0.35      0.35     10000



#Hyperparameter Tuning on Raw RGB

In [3]:
# Define hyperparameter grid
param_grid = {
    'C': [0.1, 1, 10],                # Regularization strength
    'solver': ['saga'],              # Optimized solver for multinomial LR
    'multi_class': ['multinomial'],  # Multiclass mode
    'max_iter': [300]                # Max training iterations
}

# Initialize grid search
grid_rgb = GridSearchCV(
    LogisticRegression(n_jobs=-1),
    param_grid,
    cv=3,
    verbose=2
)

# Fit using subset for speed (same as Phase 1)
grid_rgb.fit(x_train_rgb[:10000], y_train[:10000])

# Evaluate best model from grid
best_rgb_model = grid_rgb.best_estimator_
y_pred_rgb_tuned = best_rgb_model.predict(x_test_rgb)

# Accuracy report
tuned_rgb_accuracy = accuracy_score(y_test, y_pred_rgb_tuned)
print(f"Tuned Accuracy (Raw RGB): {tuned_rgb_accuracy:.4f}")
print(classification_report(y_test, y_pred_rgb_tuned))


Fitting 3 folds for each of 3 candidates, totalling 9 fits
[CV] END C=0.1, max_iter=300, multi_class=multinomial, solver=saga; total time= 3.3min
[CV] END C=0.1, max_iter=300, multi_class=multinomial, solver=saga; total time= 3.3min
[CV] END C=0.1, max_iter=300, multi_class=multinomial, solver=saga; total time= 3.2min
[CV] END C=1, max_iter=300, multi_class=multinomial, solver=saga; total time=13.0min
[CV] END C=1, max_iter=300, multi_class=multinomial, solver=saga; total time= 3.2min
[CV] END C=1, max_iter=300, multi_class=multinomial, solver=saga; total time= 3.2min
[CV] END C=10, max_iter=300, multi_class=multinomial, solver=saga; total time= 3.2min
[CV] END C=10, max_iter=300, multi_class=multinomial, solver=saga; total time= 3.3min
[CV] END C=10, max_iter=300, multi_class=multinomial, solver=saga; total time= 3.2min
Tuned Accuracy (Raw RGB): 0.3467
              precision    recall  f1-score   support

           0       0.40      0.40      0.40      1000
           1       0.42  

#Advanced Preprocessing Techniques for Logistic Regression

In [7]:
pip install scikit-image

Collecting scikit-image
  Downloading scikit_image-0.24.0-cp39-cp39-win_amd64.whl.metadata (14 kB)
Collecting networkx>=2.8 (from scikit-image)
  Downloading networkx-3.2.1-py3-none-any.whl.metadata (5.2 kB)
Collecting imageio>=2.33 (from scikit-image)
  Downloading imageio-2.37.0-py3-none-any.whl.metadata (5.2 kB)
Collecting tifffile>=2022.8.12 (from scikit-image)
  Downloading tifffile-2024.8.30-py3-none-any.whl.metadata (31 kB)
Collecting lazy-loader>=0.4 (from scikit-image)
  Downloading lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)
Downloading scikit_image-0.24.0-cp39-cp39-win_amd64.whl (12.9 MB)
   ---------------------------------------- 0.0/12.9 MB ? eta -:--:--
   ----- ---------------------------------- 1.8/12.9 MB 10.0 MB/s eta 0:00:02
   ----------- ---------------------------- 3.7/12.9 MB 10.4 MB/s eta 0:00:01
   ----------------- ---------------------- 5.8/12.9 MB 9.8 MB/s eta 0:00:01
   ------------------------- -------------- 8.1/12.9 MB 10.3 MB/s eta 0:00:01
   --

In [8]:
#HOG + Logistic Regression

import numpy as np
from tensorflow.keras.datasets import cifar10
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from skimage.color import rgb2gray
from skimage.feature import hog

# Load dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = y_train.ravel()
y_test = y_test.ravel()

# Convert to grayscale
x_train_gray = rgb2gray(x_train)
x_test_gray = rgb2gray(x_test)

# Extract HOG features
def extract_hog_features(images):
    features = []
    for image in images:
        hog_feat = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True)
        features.append(hog_feat)
    return np.array(features)

# For runtime: use subset of training for now
x_train_hog = extract_hog_features(x_train_gray[:10000])
x_test_hog = extract_hog_features(x_test_gray)

# Train logistic regression
hog_lr = LogisticRegression(max_iter=300, solver='saga', multi_class='multinomial', n_jobs=-1)
hog_lr.fit(x_train_hog, y_train[:10000])

# Evaluate
y_pred_hog = hog_lr.predict(x_test_hog)
print("Accuracy (HOG):", accuracy_score(y_test, y_pred_hog))
print(classification_report(y_test, y_pred_hog))


Accuracy (HOG): 0.4961
              precision    recall  f1-score   support

           0       0.56      0.56      0.56      1000
           1       0.57      0.59      0.58      1000
           2       0.43      0.40      0.42      1000
           3       0.38      0.30      0.34      1000
           4       0.41      0.44      0.42      1000
           5       0.42      0.38      0.40      1000
           6       0.48      0.61      0.54      1000
           7       0.55      0.54      0.55      1000
           8       0.54      0.54      0.54      1000
           9       0.59      0.60      0.59      1000

    accuracy                           0.50     10000
   macro avg       0.49      0.50      0.49     10000
weighted avg       0.49      0.50      0.49     10000



# CLAHE + Logistic Regression

In [3]:
!pip install opencv-python-headless


Collecting opencv-python-headless
  Downloading opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl (39.4 MB)
   ---------------------------------------- 0.0/39.4 MB ? eta -:--:--
   - -------------------------------------- 1.0/39.4 MB 8.4 MB/s eta 0:00:05
   -- ------------------------------------- 2.9/39.4 MB 8.0 MB/s eta 0:00:05
   ----- ---------------------------------- 5.2/39.4 MB 9.1 MB/s eta 0:00:04
   ------- -------------------------------- 7.3/39.4 MB 9.4 MB/s eta 0:00:04
   --------- ------------------------------ 9.4/39.4 MB 9.3 MB/s eta 0:00:04
   ----------- ---------------------------- 11.3/39.4 MB 9.3 MB/s eta 0:00:04
   ------------- -------------------------- 13.1/39.4 MB 9.2 MB/s eta 0:00:03
   --------------- ------------------------ 14.9/39.4 MB 9.2 MB/s eta 0:00:03
   ---------------- ----------------------- 16.3/39.4 MB 9.3 MB/s eta 0:00:03
   ------------------ ----------

In [6]:
import cv2
from sklearn.preprocessing import StandardScaler

# CLAHE enhancement
def apply_clahe(images):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced = []
    for img in images:
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        eq = clahe.apply(gray)
        enhanced.append(eq.flatten())  # Flatten to 1D for LR
    return np.array(enhanced)

# Apply CLAHE to training and test images
x_train_clahe = apply_clahe(x_train[:10000])
x_test_clahe = apply_clahe(x_test)

# Normalize
scaler = StandardScaler()
x_train_clahe_scaled = scaler.fit_transform(x_train_clahe)
x_test_clahe_scaled = scaler.transform(x_test_clahe)

# Train logistic regression
clahe_lr = LogisticRegression(max_iter=300, solver='saga', multi_class='multinomial', n_jobs=-1)
clahe_lr.fit(x_train_clahe_scaled, y_train[:10000])

# Evaluate
y_pred_clahe = clahe_lr.predict(x_test_clahe_scaled)
print("Accuracy (CLAHE):", accuracy_score(y_test, y_pred_clahe))
print(classification_report(y_test, y_pred_clahe))


Accuracy (CLAHE): 0.1916
              precision    recall  f1-score   support

           0       0.22      0.21      0.22      1000
           1       0.23      0.22      0.22      1000
           2       0.16      0.18      0.17      1000
           3       0.15      0.13      0.14      1000
           4       0.13      0.13      0.13      1000
           5       0.17      0.15      0.16      1000
           6       0.17      0.17      0.17      1000
           7       0.20      0.22      0.21      1000
           8       0.25      0.26      0.25      1000
           9       0.24      0.25      0.25      1000

    accuracy                           0.19     10000
   macro avg       0.19      0.19      0.19     10000
weighted avg       0.19      0.19      0.19     10000



#HSV Preprocessing + Logistic Regression

In [7]:
import cv2
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# Convert RGB to HSV (OpenCV expects BGR, but CIFAR-10 is RGB, so conversion is fine)
x_train_hsv = np.array([cv2.cvtColor(img, cv2.COLOR_RGB2HSV) for img in x_train[:10000]])
x_test_hsv = np.array([cv2.cvtColor(img, cv2.COLOR_RGB2HSV) for img in x_test])

# Flatten
x_train_hsv_flat = x_train_hsv.reshape((x_train_hsv.shape[0], -1))
x_test_hsv_flat = x_test_hsv.reshape((x_test_hsv.shape[0], -1))

# Normalize
scaler = StandardScaler()
x_train_hsv_scaled = scaler.fit_transform(x_train_hsv_flat)
x_test_hsv_scaled = scaler.transform(x_test_hv_flat)

# Logistic Regression
hsv_lr = LogisticRegression(max_iter=300, solver='saga', multi_class='multinomial', n_jobs=-1)
hsv_lr.fit(x_train_hsv_scaled, y_train[:10000])
y_pred_hsv = hsv_lr.predict(x_test_hsv_scaled)

# Evaluation
hsv_acc = accuracy_score(y_test, y_pred_hsv)
print(f"Accuracy (HSV): {hsv_acc:.4f}")
print(classification_report(y_test, y_pred_hsv))


Accuracy (HSV): 0.2718
              precision    recall  f1-score   support

           0       0.32      0.36      0.34      1000
           1       0.38      0.35      0.36      1000
           2       0.18      0.19      0.18      1000
           3       0.19      0.19      0.19      1000
           4       0.20      0.21      0.21      1000
           5       0.20      0.20      0.20      1000
           6       0.29      0.29      0.29      1000
           7       0.27      0.25      0.26      1000
           8       0.34      0.38      0.36      1000
           9       0.35      0.30      0.32      1000

    accuracy                           0.27     10000
   macro avg       0.27      0.27      0.27     10000
weighted avg       0.27      0.27      0.27     10000



In [None]:
#PCA Preprocessing + Logistic Regression

In [8]:
import numpy as np
from tensorflow.keras.datasets import cifar10
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# Load CIFAR-10
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = y_train.ravel()
y_test = y_test.ravel()

# Subset for runtime efficiency
x_train_subset = x_train[:10000]
y_train_subset = y_train[:10000]

# Flatten RGB images
x_train_flat = x_train_subset.reshape((x_train_subset.shape[0], -1))
x_test_flat = x_test.reshape((x_test.shape[0], -1))

# Normalize
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train_flat)
x_test_scaled = scaler.transform(x_test_flat)

# Apply PCA
pca = PCA(n_components=150)  # You can tweak this value (e.g., 100–200)
x_train_pca = pca.fit_transform(x_train_scaled)
x_test_pca = pca.transform(x_test_scaled)

# Logistic Regression
pca_lr = LogisticRegression(max_iter=300, solver='saga', multi_class='multinomial', n_jobs=-1)
pca_lr.fit(x_train_pca, y_train_subset)

# Evaluate
y_pred_pca = pca_lr.predict(x_test_pca)
print(f"PCA Accuracy: {accuracy_score(y_test, y_pred_pca):.4f}")
print(classification_report(y_test, y_pred_pca))


PCA Accuracy: 0.3807
              precision    recall  f1-score   support

           0       0.43      0.46      0.45      1000
           1       0.46      0.46      0.46      1000
           2       0.28      0.28      0.28      1000
           3       0.29      0.26      0.27      1000
           4       0.32      0.25      0.28      1000
           5       0.34      0.30      0.32      1000
           6       0.39      0.47      0.43      1000
           7       0.41      0.40      0.41      1000
           8       0.44      0.51      0.47      1000
           9       0.41      0.41      0.41      1000

    accuracy                           0.38     10000
   macro avg       0.38      0.38      0.38     10000
weighted avg       0.38      0.38      0.38     10000



#Accuracy vs. Regularization Plot

In [2]:
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Prepare flattened RGB data (if not already done)
x_train_rgb = x_train.reshape((x_train.shape[0], -1))
x_test_rgb = x_test.reshape((x_test.shape[0], -1))

# Use a subset to speed up
x_train_sub = x_train_rgb[:10000]
y_train_sub = y_train[:10000]

# Regularization values
c_values = [0.01, 0.1, 1, 10, 100]
test_accuracies = []

# Run Logistic Regression for each C value
for c in c_values:
    model = LogisticRegression(C=c, solver='saga', max_iter=300, multi_class='multinomial', n_jobs=-1)
    model.fit(x_train_sub, y_train_sub)
    y_pred = model.predict(x_test_rgb)
    acc = accuracy_score(y_test, y_pred)
    test_accuracies.append(acc)
    print(f"C={c}: Test Accuracy={acc:.4f}")

# Plotting
plt.figure(figsize=(8, 5))
plt.plot(c_values, test_accuracies, marker='o', linestyle='--', color='blue')
plt.xscale('log')
plt.title("Phase 4: Accuracy vs. Regularization Strength (C)")
plt.xlabel("C (Inverse of Regularization Strength)")
plt.ylabel("Test Accuracy")
plt.grid(True)
plt.show()

NameError: name 'x_train' is not defined