In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score

# Load preprocessed data
train_images = np.load('train_images_flat.npy')
train_labels = np.load('train_labels_noisy.npy')
train_images_subset = np.load('train_images_flat_subset.npy')
train_labels_subset = np.load('train_labels_noisy_subset.npy')
test_images = np.load('test_images_flat.npy')
test_labels = np.load('test_labels_binary.npy')

# Define hyperparameter search space
C_values = np.logspace(-3, 3, 10)  # C values to test
k = 5  # Number of folds for cross-validation

# Initialize variables to track the best configuration and errors
optimal_C = None
lowest_error = np.inf
best_training_error = np.inf  # Track the best training error

# Perform k-fold cross-validation for each C value
for C in C_values:
    model = SVC(kernel='linear', C=C)
    
    # Perform k-fold cross-validation using built-in function
    val_scores = cross_val_score(model, train_images_subset, train_labels_subset, cv=k, scoring='accuracy')
    mean_val_error = 1 - np.mean(val_scores)  # Convert accuracy to error
    
    # Track the best validation error and corresponding C
    if mean_val_error < lowest_error:
        lowest_error = mean_val_error
        optimal_C = C
    
    # Calculate the training error for this C value
    model.fit(train_images_subset, train_labels_subset)  # Train on the full subset
    train_predictions = model.predict(train_images_subset)
    train_error = 1 - accuracy_score(train_labels_subset, train_predictions)
    
    # Track the best training error
    if train_error < best_training_error:
        best_training_error = train_error

# Output the results
print("Optimal C:", optimal_C)
print("Best validation error:", lowest_error)
print("Best training error:", best_training_error)

# Train final model with optimal C
final_model = SVC(kernel='linear', C=optimal_C)
final_model.fit(train_images, train_labels)

# Evaluate the model on the test set
test_predictions = final_model.predict(test_images)
test_accuracy = accuracy_score(test_labels, test_predictions)
test_error = 1 - test_accuracy

# Output the test error
print("Test error with optimal C:", test_error)

Optimal C: 0.46415888336127775
Best validation error: 0.48
Best training error: 0.0
Test error with optimal C: 0.03849999999999998


In [2]:
import numpy as np

# Define hyperparameter search space
C_values = np.logspace(-3, 3, 10)

print(C_values)

[1.00000000e-03 4.64158883e-03 2.15443469e-02 1.00000000e-01
 4.64158883e-01 2.15443469e+00 1.00000000e+01 4.64158883e+01
 2.15443469e+02 1.00000000e+03]
