In [1]:
# -*- coding: utf-8 -*-

import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

print("1. Loading Data")
try:
    X_train = np.load('../data/classification/X_train.npy')
    y_train = np.load('../data/classification/y_train.npy')
    X_test = np.load('../data/classification/X_test.npy')
    y_test = np.load('../data/classification/y_test.npy')
    print("Data loaded successfully.")
except FileNotFoundError:
    print("\nError: Data files not found. Please check the path '../data/classification/'.")
    exit()

print("\n 2. Training the best-performing model")

best_model = SVC(C=10, gamma=0.01, kernel='rbf', random_state=42)

best_model.fit(X_train, y_train)

print("\n 3. Evaluation")

y_pred = best_model.predict(X_test)
final_accuracy = accuracy_score(y_test, y_pred)

print("\n")
print("FINAL REPORT FOR EXERCISE 5")
print(f"\nBest Model Found: Support Vector Classifier (SVC)")
print(f"Optimal Preprocessing: None (raw data)")
print(f"Optimal Hyperparameters: C=10, gamma=0.01")
print(f"Final Test Accuracy: {final_accuracy:.4f}")

1. Loading Data
Data loaded successfully.

 2. Training the best-performing model

 3. Evaluation


FINAL REPORT FOR EXERCISE 5

Best Model Found: Support Vector Classifier (SVC)
Optimal Preprocessing: None (raw data)
Optimal Hyperparameters: C=10, gamma=0.01
Final Test Accuracy: 0.8075


To solve the classification task, we made an evaluation of the five classifiers suggested. We used GridSearchCV to find the best hyperparameters for each model for 2 scenarios: using the raw data and using data scaled with StandardScaler.
This search showed that data scaling made model performance worse on the dataset. The highest accuracy was achieved by a SVC trained directly on the raw data. The final test accuracy is 0.8075, which is below the 0.85 objective, but this approach is the optimal solution that we found with our process.