## Train Base Classifiers and save them

In [1]:
import torch
import matplotlib.pyplot as plt
import os
from sklearn.metrics import accuracy_score
import joblib
import sys
import numpy as np


CLIP_FEATURES_DIR = "clip_features"
TRAIN = CLIP_FEATURES_DIR + "/train_features.pt"
VAL = CLIP_FEATURES_DIR + "/val_features.pt"
SCALER = "scaler_model.joblib"
PCA = "pca_model.joblib"
LIME = "top_k_lime_indices.joblib"

In [2]:
def load_features(file_path):
    data = torch.load(file_path)
    return data["image_features"], data["text_features"], data["filenames"], data["labels"]


# Load train and validation features
train_img_features, train_txt_features, _, train_labels = load_features(TRAIN)
val_img_features, val_txt_features, _, val_labels = load_features(VAL)

In [3]:
# Combine image and text features for training
X_train = torch.cat((train_img_features, train_txt_features), dim=1)
X_val = torch.cat((val_img_features, val_txt_features), dim=1)

# Flatten features into a 2D matrix (samples x features)
X_train = X_train.view(X_train.size(0), -1).numpy()
X_val = X_val.view(X_val.size(0), -1).numpy()

print(f"X_train shape: {X_train.shape}")
print(f"X_val shape: {X_val.shape}")

# Convert labels to NumPy arrays
y_train = train_labels.numpy()
y_val = val_labels.numpy()  

# Load scaler and PCA models
scaler = joblib.load(SCALER)
pca = joblib.load(PCA)
lime = joblib.load(LIME)

# Scale and transform the features
X_train_scaled = scaler.transform(X_train)
X_val_scaled = scaler.transform(X_val)

X_train_pca = pca.transform(X_train_scaled)
X_val_pca = pca.transform(X_val_scaled)

X_train_lime = X_train_scaled[:, lime]
X_val_lime = X_val_scaled[:, lime]

print(f"X_train_pca shape: {X_train_pca.shape}")
print(f"X_val_pca shape: {X_val_pca.shape}")
print(f"X_train_lime shape: {X_train_lime.shape}")
print(f"X_val_lime shape: {X_val_lime.shape}")

X_train shape: (7919, 1024)
X_val shape: (1985, 1024)
X_train_pca shape: (7919, 563)
X_val_pca shape: (1985, 563)
X_train_lime shape: (7919, 250)
X_val_lime shape: (1985, 250)


In [4]:
from classifiers import (
    SVMClassifier, RBFClassifier, RandomForestClassifier, NaiveBayesClassifier, 
    LogisticRegressionClassifier, LDAClassifier, KNNClassifier, DecisionTreeClassifier,
    AdaBoostClassifier, GBMClassifier, XGBoostClassifier
)


# Initialize classifiers
classifiers = {
    "SVM": SVMClassifier(),
    "RBF": RBFClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Naive Bayes": NaiveBayesClassifier(),
    "Logistic Regression": LogisticRegressionClassifier(),
    "LDA": LDAClassifier(),
    "KNN": KNNClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Gradient Boosting": GBMClassifier(),
    "XGBoost": XGBoostClassifier()
}


# Train and evaluate classifiers
results = {}
for name, clf in classifiers.items():
    print(f"Training {name}...")
    
    clf.train(X_train_pca, y_train)
        
    y_pred = clf.classify(X_val_pca)
    print(f"Evaluating {name}...")
    accuracy = accuracy_score(y_val, y_pred)
    results[name] = accuracy
    print(f"{name} Accuracy: {accuracy:.4f}")
    clf.save(model_dir="models_lime")

Training SVM...
Evaluating SVM...
SVM Accuracy: 0.6247
Model saved to: models_lime/SVM.joblib
Label encoder saved to: models_lime/label_encoder.joblib
Training RBF...
Evaluating RBF...
RBF Accuracy: 0.6448
Model saved to: models_lime/RBF.joblib
Label encoder saved to: models_lime/label_encoder.joblib
Training Random Forest...
Evaluating Random Forest...
Random Forest Accuracy: 0.4408
Model saved to: models_lime/RandomForest.joblib
Label encoder saved to: models_lime/label_encoder.joblib
Training Naive Bayes...
Evaluating Naive Bayes...
Naive Bayes Accuracy: 0.5557
Model saved to: models_lime/NaiveBayes.joblib
Label encoder saved to: models_lime/label_encoder.joblib
Training Logistic Regression...
Evaluating Logistic Regression...
Logistic Regression Accuracy: 0.6176
Model saved to: models_lime/LogisticRegression.joblib
Label encoder saved to: models_lime/label_encoder.joblib
Training LDA...
Evaluating LDA...
LDA Accuracy: 0.5889
Model saved to: models_lime/LDA.joblib
Label encoder save

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Evaluating XGBoost...
XGBoost Accuracy: 0.6025
Model saved to: models_lime/XGBoost.joblib
Label encoder saved to: models_lime/label_encoder.joblib


In [5]:
print(results)

{'SVM': 0.6246851385390428, 'RBF': 0.6448362720403022, 'Random Forest': 0.44080604534005036, 'Naive Bayes': 0.5556675062972293, 'Logistic Regression': 0.617632241813602, 'LDA': 0.5889168765743074, 'KNN': 0.6151133501259446, 'Decision Tree': 0.4125944584382871, 'AdaBoost': 0.3612090680100756, 'Gradient Boosting': 0.46700251889168765, 'XGBoost': 0.6025188916876574}


In [6]:
svm = SVMClassifier()
svm.load()


Loaded model from: models_pca/SVM.joblib
Loaded label encoder from: models_pca/label_encoder.joblib


In [7]:
# Test the classify method on the first sample of X_val_pca


print(svm.classify(X_val_pca[1].reshape(1, -1)))

res = svm.classify_proba(X_val_pca[1].reshape(1, -1))

print(res)
print(len(res[0]))



[19]
[[2.32076981e-02 1.34611236e-03 1.25901714e-02 5.85654567e-04
  4.07275092e-04 4.25146018e-04 1.39466697e-03 2.53670457e-02
  2.54614091e-03 1.30538139e-03 2.01477715e-03 0.00000000e+00
  4.57641803e-04 5.06560856e-04 1.97880845e-03 1.19586955e-02
  1.19021863e-03 3.73154936e-03 8.41261570e-01 1.05893384e-02
  6.45910371e-03 7.33609861e-04 1.09882234e-03 1.56283540e-03
  2.27052902e-03 0.00000000e+00 1.08528036e-03 8.20296490e-04
  0.00000000e+00 0.00000000e+00 6.63490490e-04 3.64371662e-03
  1.14689461e-03 1.03207469e-03 8.08518500e-04 2.81255769e-04
  8.56127698e-04 6.63258062e-04 0.00000000e+00 1.29452462e-03
  2.50078180e-04 3.10780820e-04 3.46581456e-04 1.41727148e-03
  0.00000000e+00 6.36439110e-04 1.29948245e-03 5.62925294e-04
  3.89759845e-04 5.56517860e-04 1.10346011e-03 1.10908070e-03
  1.72348807e-03 2.74565925e-04 4.98832954e-04 3.25320580e-04
  1.05945529e-03 2.94146263e-04 2.10355062e-04 3.50596755e-04
  3.57241552e-04 1.51403936e-03 1.15708915e-03 3.03519275e-03
  7