In [6]:
import os
import time
import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix

Configs

In [7]:
DATASETS = ['dataset1', 'dataset2']
FEATURES_DIR = 'extracted_features'
MODELS_DIR = 'models'
RESULTS_DIR = 'results'
os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)

Helper Methods

In [8]:
def plot_confusion_matrix(y_true, y_pred, class_names, model_name, dataset_name, results_dir):
    cm = confusion_matrix(y_true, y_pred, labels=np.arange(len(class_names)))
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted Label')
    plt.ylabel('Actual Label')
    plt.title(f'Dataset: {dataset_name}\nModel: {model_name} - Confusion Matrix')
    plt.tight_layout()
    output_path = os.path.join(results_dir, f"{dataset_name}_{model_name}_best_confusion_matrix.png")
    plt.savefig(output_path)
    plt.show()

Define hyperparameter search values and pipelines for each model (SVM, GradientBoosting, MLP)

In [9]:
model_params = {
    'SVM': {'classifier__C': [1, 10], 'classifier__gamma': ['scale', 0.001]},
    'GradientBoosting': {'classifier__n_estimators': [100, 200], 'classifier__learning_rate': [0.05, 0.1]},
    'MLP': {'classifier__hidden_layer_sizes': [(50,), (100,)], 'classifier__alpha': [0.0001, 0.001]}
}

pipelines = {
    'SVM': Pipeline([('scaler', StandardScaler()), ('pca', PCA(n_components=0.95)), ('classifier', SVC(probability=True, random_state=42))]),
    'GradientBoosting': Pipeline([('scaler', StandardScaler()), ('pca', PCA(n_components=0.95)), ('classifier', GradientBoostingClassifier(random_state=42))]),
    'MLP': Pipeline([('scaler', StandardScaler()), ('pca', PCA(n_components=0.95)), ('classifier', MLPClassifier(max_iter=500, early_stopping=True, random_state=42))])
}

Main loop for the 2 datasets

In [None]:
all_feature_sets = [f for f in os.listdir(FEATURES_DIR) if f.endswith('.npz')]

for dataset_name in DATASETS:
    print(f'dataset: {dataset_name}\n')
    print('#' * 80)

    results_for_dataset = {}
    dataset_feature_sets = [f for f in all_feature_sets if f.startswith(dataset_name)]

    # try models for each dataset

    for model_name, pipeline in pipelines.items():
        print(f'searching best feature set for {model_name}\n')
        print("-"*80)
        start_time = time.time()

        curr_model_best_score = 0
        curr_model_best_config = {}
        param_grid = model_params[model_name]

        for feature_file in dataset_feature_sets:
            feature_set_name = os.path.splitext(feature_file)[0]
            print(f'\n--- trying: {feature_set_name} ---')
            
            filepath = os.path.join(FEATURES_DIR, feature_file)
            with np.load(filepath, allow_pickle=True) as data:
                X, y, class_map = data['features'], data['labels'], data['class_map'].item()
            
            class_names = list(class_map.keys())
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42, stratify=y)
            
            grid_search = GridSearchCV(pipeline, param_grid, cv=3, n_jobs=2, verbose=2)
            grid_search.fit(X_train, y_train)
            
            accuracy = grid_search.score(X_test, y_test)
            print(f'accuracy for {model_name} with {feature_set_name}: {accuracy:.4f}')

            if accuracy > curr_model_best_score:
                curr_model_best_score = accuracy
                curr_model_best_config = {
                    'model_name': model_name, 'feature_set': feature_set_name,
                    'pipeline': grid_search.best_estimator_, 'best_params': grid_search.best_params_,
                    'X_test': X_test, 'y_test': y_test, 'class_names': class_names
                }
        
        # save best result after trying all feature set combinations

        if curr_model_best_config:
            results_for_dataset[model_name] = curr_model_best_config
        print(f'\nfinished tuning for {model_name} in {(time.time() - start_time) / 60:.2f} minutes')
            
    
    # save and print summary report for the dataset

    print('\n' + '-' * 80)
    print(f'REPORT: {dataset_name.upper()}\n')
    print('-' * 80)

    summary_data = []

    for model_name, best_config in results_for_dataset.items():
        print(f'\n--- best performance for {model_name}  ---')
        
        best_pipeline = best_config['pipeline']
        y_pred = best_pipeline.predict(best_config['X_test'])
        report_dict = classification_report(best_config['y_test'], y_pred, target_names=best_config['class_names'], output_dict=True)
        
        print(f"\nbest feature set: {best_config['feature_set']}")
        print(f"\nbest hyperparameters: {best_config['best_params']}")
        print('\nclassification report:\n')
        print(classification_report(best_config['y_test'], y_pred, target_names=best_config['class_names']))
        
        plot_confusion_matrix(y_true=best_config['y_test'], y_pred=y_pred, class_names=best_config['class_names'], model_name=model_name, dataset_name=dataset_name, results_dir=RESULTS_DIR)
        
        best_model_path = os.path.join(MODELS_DIR, f'{dataset_name}_{model_name}_best_model.pkl')
        joblib.dump(best_pipeline, best_model_path)
        print(f'\nsaved best {model_name} model for {dataset_name} to {best_model_path}')
        
        summary_data.append({
            'Model': model_name, 'Best Feature Set': best_config['feature_set'], 'Best Params': str(best_config['best_params']),
            'Accuracy': report_dict['accuracy'], 'F1-Score (Macro)': report_dict['macro avg']['f1-score'],
            'Precision (Macro)': report_dict['macro avg']['precision'], 'Recall (Macro)': report_dict['macro avg']['recall'],
        })

    if summary_data:
        print('\n' + '-' * 80)
        print(f"COMPARISON TABLE FOR {dataset_name.upper()}")
        print('-' * 80)
        df_final_summary = pd.DataFrame(summary_data).sort_values(by="Accuracy", ascending=False).set_index('Model')
        print(df_final_summary.to_string())
        df_final_summary.to_csv(os.path.join(RESULTS_DIR, f'{dataset_name}_final_model_comparison_summary.csv'))
        print(f'\ncomparison table saved to {RESULTS_DIR}/{dataset_name}_final_model_comparison_summary.csv ')

print('\n\n' + '#' * 80)
print("DONE")
print('\n\n' + '#' * 80)



dataset: dataset1

################################################################################
searching best feature set for SVM

--------------------------------------------------------------------------------

--- trying: dataset1_hog16x16_lbp16p1r ---
Fitting 3 folds for each of 4 candidates, totalling 12 fits
accuracy for SVM with dataset1_hog16x16_lbp16p1r: 0.7174

--- trying: dataset1_hog16x16_lbp16p2r ---
Fitting 3 folds for each of 4 candidates, totalling 12 fits
accuracy for SVM with dataset1_hog16x16_lbp16p2r: 0.7218

--- trying: dataset1_hog16x16_lbp16p3r ---
Fitting 3 folds for each of 4 candidates, totalling 12 fits
accuracy for SVM with dataset1_hog16x16_lbp16p3r: 0.7254

--- trying: dataset1_hog16x16_lbp8p1r ---
Fitting 3 folds for each of 4 candidates, totalling 12 fits
accuracy for SVM with dataset1_hog16x16_lbp8p1r: 0.7145

--- trying: dataset1_hog16x16_lbp8p2r ---
Fitting 3 folds for each of 4 candidates, totalling 12 fits
accuracy for SVM with dataset1_hog16x1

9 fits failed out of a total of 12.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
6 fits failed with the following error:
Traceback (most recent call last):
  File "d:\VSCode Projects\weather-image-classification\.venv\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "d:\VSCode Projects\weather-image-classification\.venv\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "d:\VSCode Projects\weather-image-classification\.venv\lib\site-packages\sklearn\pipeline.py", line 654, in fit
    Xt = self._fit(X, y, routed_params, raw_params=params)
  File "d:\VSCode Projects\weather-image-

accuracy for SVM with dataset1_hog8x8_lbp16p1r: 0.6686

--- trying: dataset1_hog8x8_lbp16p2r ---
Fitting 3 folds for each of 4 candidates, totalling 12 fits


9 fits failed out of a total of 12.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "d:\VSCode Projects\weather-image-classification\.venv\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "d:\VSCode Projects\weather-image-classification\.venv\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "d:\VSCode Projects\weather-image-classification\.venv\lib\site-packages\sklearn\pipeline.py", line 654, in fit
    Xt = self._fit(X, y, routed_params, raw_params=params)
  File "d:\VSCode Projects\weather-image-

accuracy for SVM with dataset1_hog8x8_lbp16p2r: 0.6701

--- trying: dataset1_hog8x8_lbp16p3r ---
Fitting 3 folds for each of 4 candidates, totalling 12 fits


9 fits failed out of a total of 12.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
2 fits failed with the following error:
Traceback (most recent call last):
  File "d:\VSCode Projects\weather-image-classification\.venv\lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "d:\VSCode Projects\weather-image-classification\.venv\lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "d:\VSCode Projects\weather-image-classification\.venv\lib\site-packages\sklearn\pipeline.py", line 654, in fit
    Xt = self._fit(X, y, routed_params, raw_params=params)
  File "d:\VSCode Projects\weather-image-

KeyboardInterrupt: 