## Train Base Classifiers and save them

In [1]:
import torch
import matplotlib.pyplot as plt
import os
from sklearn.metrics import accuracy_score
import joblib
import sys
import numpy as np


CLIP_FEATURES_DIR = "clip_features"
TRAIN = CLIP_FEATURES_DIR + "/train_features.pt"
VAL = CLIP_FEATURES_DIR + "/val_features.pt"
SCALER = "scaler_model.joblib"
PCA = "pca_model.joblib"

In [2]:
def load_features(file_path):
    data = torch.load(file_path)
    return data["image_features"], data["text_features"], data["filenames"], data["labels"]


# Load train and validation features
train_img_features, train_txt_features, _, train_labels = load_features(TRAIN)
val_img_features, val_txt_features, _, val_labels = load_features(VAL)

In [3]:
# Combine image and text features for training
X_train = torch.cat((train_img_features, train_txt_features), dim=1)
X_val = torch.cat((val_img_features, val_txt_features), dim=1)

# Flatten features into a 2D matrix (samples x features)
X_train = X_train.view(X_train.size(0), -1).numpy()
X_val = X_val.view(X_val.size(0), -1).numpy()

print(f"X_train shape: {X_train.shape}")
print(f"X_val shape: {X_val.shape}")

# Convert labels to NumPy arrays
y_train = train_labels.numpy()
y_val = val_labels.numpy()  

# Load scaler and PCA models
scaler = joblib.load(SCALER)
pca = joblib.load(PCA)

# Scale and transform the features
X_train_scaled = scaler.transform(X_train)
X_val_scaled = scaler.transform(X_val)

X_train_pca = pca.transform(X_train_scaled)
X_val_pca = pca.transform(X_val_scaled)

print(f"X_train_pca shape: {X_train_pca.shape}")
print(f"X_val_pca shape: {X_val_pca.shape}")

X_train shape: (7919, 1024)
X_val shape: (1985, 1024)
X_train_pca shape: (7919, 563)
X_val_pca shape: (1985, 563)


In [4]:
from classifiers import (
    SVMClassifier, RBFClassifier, RandomForestClassifier, NaiveBayesClassifier, 
    LogisticRegressionClassifier, LDAClassifier, KNNClassifier, DecisionTreeClassifier,
    AdaBoostClassifier, GBMClassifier, XGBoostClassifier
)


# Initialize classifiers
classifiers = {
    #"SVM": SVMClassifier(),
    #"RBF": RBFClassifier(),
    #"Random Forest": RandomForestClassifier(),
    #"Naive Bayes": NaiveBayesClassifier(),
    #"Logistic Regression": LogisticRegressionClassifier(),
    #"LDA": LDAClassifier(),
    #"KNN": KNNClassifier(),
    #"Decision Tree": DecisionTreeClassifier(),
    #"AdaBoost": AdaBoostClassifier(),
    #"Gradient Boosting": GBMClassifier(),
    #"XGBoost": XGBoostClassifier()
}


# Train and evaluate classifiers
results = {}
for name, clf in classifiers.items():
    print(f"Training {name}...")
    
    clf.train(X_train_pca, y_train)
        
    y_pred = clf.classify(X_val_pca)
    print(f"Evaluating {name}...")
    accuracy = accuracy_score(y_val, y_pred)
    results[name] = accuracy
    print(f"{name} Accuracy: {accuracy:.4f}")
    clf.save()

In [5]:
print(results)

{}


In [6]:
xgboost = XGBoostClassifier()
xgboost.load()


Model and label encoder loaded from models/XGBoost.joblib and models/XGBoost_label_encoder.joblib


In [None]:
# Test the classify method on the first sample of X_val_pca

print(xgboost.classify(X_val_pca[1].reshape(1, -1)))

res = xgboost.classify_proba(X_val_pca[1].reshape(1, -1))

print(res)



[19]
[[1.37813417e-02 1.68876039e-04 1.98507519e-03 5.45410767e-05
  1.78898626e-04 9.90929402e-05 4.10387962e-04 2.36996752e-03
  5.20726258e-04 1.11654204e-04 5.82432331e-05 0.00000000e+00
  3.76737189e-05 4.38489405e-05 3.08587478e-04 4.22183052e-03
  3.20587947e-04 1.96695351e-03 9.49150920e-01 3.73513065e-03
  2.68940534e-03 1.86889170e-04 5.26587864e-05 1.12512370e-03
  1.21912325e-03 0.00000000e+00 1.85816622e-04 3.64679545e-05
  0.00000000e+00 0.00000000e+00 3.64352054e-05 1.95080170e-03
  1.00423978e-03 3.54179574e-05 2.88212195e-05 5.33743587e-05
  9.30780152e-05 6.76573400e-05 0.00000000e+00 4.09994027e-05
  1.08820364e-04 2.69440479e-05 3.45385270e-05 5.88757743e-04
  0.00000000e+00 1.17469230e-04 6.08132919e-04 9.99079493e-05
  9.60601028e-05 6.42508749e-05 7.39157142e-04 8.90914525e-05
  3.39540769e-04 3.56997953e-05 1.77484428e-04 5.33718630e-05
  1.37861789e-04 2.97704100e-05 1.64585465e-04 1.04013438e-04
  5.13016748e-05 1.54246157e-03 3.54006370e-05 1.32902316e-03
  1

90