# Libraries

In [10]:
import pickle
import numpy as np
import time
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from imblearn.over_sampling import SMOTE
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
import xgboost as xgb
from sklearn.linear_model import LogisticRegression
from paths import SKIMAGE_FEATURES_PATH, IMG2VEC_FEATURES_PATH

# Data Processing (IMG2VEC)
## Import

In [11]:
with open(IMG2VEC_FEATURES_PATH, "rb") as f:
    X, y = pickle.load(f)

## Split

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=28, stratify=y)

## Scale

In [13]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SVM
Best Parameters: {'pca__n_components': 230, 'svm__C': 24}


In [14]:
SVM_pipeline = Pipeline([
    ('pca', PCA(n_components=230)),
    ('svm', SVC(kernel='rbf', C=24, max_iter=5000, tol=0.001, 
                random_state=28, class_weight="balanced", gamma = "scale"))
])

start_train = time.time()
SVM_pipeline.fit(X_train_scaled, y_train)
end_train = time.time()
training_time = end_train - start_train

print("Training Time: {:.4f} seconds".format(training_time))

Training Time: 26.4602 seconds




In [15]:
start_infer = time.time()
y_pred = SVM_pipeline.predict(X_test_scaled)
end_infer = time.time()
inference_time = end_infer - start_infer

print("Inference Time: {:.4f} seconds".format(inference_time))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Inference Time: 10.1463 seconds
Accuracy: 0.8540833809251857
Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.69      0.71       225
           1       0.81      0.78      0.80       213
           2       0.88      0.85      0.86       206
           3       0.78      0.78      0.78       204
           4       0.83      0.82      0.82       213
           5       0.85      0.78      0.81       202
           6       0.88      0.82      0.85       215
           7       0.81      0.69      0.75       203
           8       0.88      0.93      0.90      1821

    accuracy                           0.85      3502
   macro avg       0.83      0.79      0.81      3502
weighted avg       0.85      0.85      0.85      3502



# Random Forest
Best Parameters: {'RF__class_weight': 'balanced', 'RF__n_estimators': 230, 'pca__n_components': 28}

In [16]:
Forest_pipeline = Pipeline([
    ('pca', PCA(n_components=28)),  # or use SelectKBest instead
    ('RF', RandomForestClassifier(n_estimators=230, class_weight="balanced", 
                                   n_jobs=-1, random_state=28))
])

start_train = time.time()
Forest_pipeline.fit(X_train_scaled, y_train)
end_train = time.time()
training_time = end_train - start_train

print("Training Time: {:.4f} seconds".format(training_time))

Training Time: 4.7832 seconds


In [17]:
start_infer = time.time()
y_pred = Forest_pipeline.predict(X_test_scaled)
end_infer = time.time()
inference_time = end_infer - start_infer

print("Inference Time: {:.4f} seconds".format(inference_time))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Inference Time: 0.1372 seconds
Accuracy: 0.7027412906910337
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.39      0.51       225
           1       0.82      0.38      0.51       213
           2       0.86      0.51      0.64       206
           3       0.84      0.33      0.47       204
           4       0.86      0.65      0.74       213
           5       0.85      0.41      0.55       202
           6       0.82      0.30      0.44       215
           7       0.85      0.33      0.48       203
           8       0.66      0.97      0.79      1821

    accuracy                           0.70      3502
   macro avg       0.81      0.47      0.57      3502
weighted avg       0.74      0.70      0.67      3502



# KNN
Best Parameters: {'KNN__metric': 'cosine', 'KNN__n_neighbors': 8, 'KNN__weights': 'distance', 'pca__n_components': 210}

In [18]:
KNN_pipeline = Pipeline([
    ('pca', PCA(n_components=210)),  # or use SelectKBest instead
    ('KNN', KNeighborsClassifier(n_neighbors=8, metric="cosine", weights="distance"))
])

start_train = time.time()
KNN_pipeline.fit(X_train_scaled, y_train)
end_train = time.time()
training_time = end_train - start_train

print("Training Time: {:.4f} seconds".format(training_time))

Training Time: 0.1845 seconds


In [19]:
start_infer = time.time()
y_pred = KNN_pipeline.predict(X_test_scaled)
end_infer = time.time()
inference_time = end_infer - start_infer

print("Inference Time: {:.4f} seconds".format(inference_time))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Inference Time: 0.9350 seconds
Accuracy: 0.7966876070816676
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.57      0.65       225
           1       0.83      0.56      0.67       213
           2       0.76      0.81      0.78       206
           3       0.68      0.62      0.65       204
           4       0.73      0.77      0.75       213
           5       0.78      0.71      0.74       202
           6       0.82      0.69      0.75       215
           7       0.76      0.51      0.61       203
           8       0.82      0.93      0.87      1821

    accuracy                           0.80      3502
   macro avg       0.77      0.69      0.72      3502
weighted avg       0.79      0.80      0.79      3502



# Logistic Regresiion
Best Parameters: {'LGR__C': np.float64(135.0), 'LGR__penalty': 'l1', 'pca__n_components': 275}

In [20]:
LGR_pipeline = Pipeline([
    ('pca', PCA(n_components=275)),
    ('LGR', LogisticRegression(multi_class='multinomial', solver='saga', max_iter=1000, class_weight='balanced', penalty = "l1", C = 135.0))
])

start_train = time.time()
LGR_pipeline.fit(X_train_scaled, y_train)
end_train = time.time()
training_time = end_train - start_train

print("Training Time: {:.4f} seconds".format(training_time))



Training Time: 368.6692 seconds


In [21]:
start_infer = time.time()
y_pred = LGR_pipeline.predict(X_test_scaled)
end_infer = time.time()
inference_time = end_infer - start_infer

print("Inference Time: {:.4f} seconds".format(inference_time))
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Inference Time: 0.0202 seconds
Accuracy: 0.683894917190177
Classification Report:
               precision    recall  f1-score   support

           0       0.54      0.65      0.59       225
           1       0.53      0.72      0.61       213
           2       0.65      0.86      0.74       206
           3       0.50      0.75      0.60       204
           4       0.64      0.76      0.69       213
           5       0.58      0.84      0.69       202
           6       0.54      0.83      0.65       215
           7       0.48      0.69      0.57       203
           8       0.93      0.61      0.74      1821

    accuracy                           0.68      3502
   macro avg       0.60      0.75      0.65      3502
weighted avg       0.75      0.68      0.69      3502

