In [71]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC

from utils.preprocessing import Preprocessor

In [37]:
def load_train_test():
    df = Preprocessor.load_images(dir='data/pngimages/', resize_dims=(64, 64))
    return train_test_split(df, test_size=0.2)

train_ds, test_ds = load_train_test()

In [38]:
print(train_ds['images'][0].shape)

(12288,)


In [58]:
steps = [
    ('scaler', MinMaxScaler()),
    ('clf', SVC(kernel='poly'))
]

def check_results(cv_results):
    # Extract the hyperparameters and corresponding validation scores
    params = cv_results['params']
    mean_scores = cv_results['mean_test_score']
    std_scores = cv_results['std_test_score']

    # Combine hyperparameters, mean scores, and std scores into a list of tuples
    results = list(zip(params, mean_scores, std_scores))

    # Sort the results based on mean scores in descending order
    results.sort(key=lambda x: x[1], reverse=True)

    # Display the top 5 best hyperparameters and their mean and standard deviation of predictions
    print("Top 5 Best Hyperparameters:")
    for i in range(5):
        params, mean_score, std_score = results[i]
        print(f"Hyperparameters: {params}, Mean Score: {mean_score:.4f}, Std Score: {std_score:.4f}")

def check_grid(param_grid):
    pipeline = Pipeline(steps)
    kfold = KFold(n_splits=5)

    grid_search = GridSearchCV(pipeline, param_grid, cv=kfold, n_jobs=-1)
    grid_search.fit(train_ds['images'].to_list(), train_ds['labels'])
    check_results(grid_search.cv_results_)

In [59]:
param_grid = {
    'clf__degree': [1, 2, 3, 4],
    'clf__C': [0.001, 0.01, 0.1, 1, 2, 5],
}
check_grid(param_grid)

Top 5 Best Hyperparameters:
Hyperparameters: {'clf__C': 0.1, 'clf__degree': 4}, Mean Score: 0.9044, Std Score: 0.0096
Hyperparameters: {'clf__C': 1, 'clf__degree': 4}, Mean Score: 0.9044, Std Score: 0.0096
Hyperparameters: {'clf__C': 2, 'clf__degree': 4}, Mean Score: 0.9044, Std Score: 0.0096
Hyperparameters: {'clf__C': 5, 'clf__degree': 4}, Mean Score: 0.9044, Std Score: 0.0096
Hyperparameters: {'clf__C': 1, 'clf__degree': 2}, Mean Score: 0.9022, Std Score: 0.0130


In [64]:
final_clf = SVC(
    kernel='poly', # pilot runs
    degree=4, # from tuning
    C=1 # default / from tuning
)

final_clf.fit(train_ds['images'].to_list(), train_ds['labels'])
y_pred = final_clf.predict(test_ds['images'].to_list())
print('Test accuracy:', accuracy_score(test_ds['labels'], y_pred))

Test accuracy: 0.8844444444444445
