In [6]:
import torch
import matplotlib.pyplot as plt
import os
from sklearn.metrics import accuracy_score
import joblib
import sys
import numpy as np
import json
import time
from sklearn.metrics import (
    accuracy_score, recall_score, f1_score, precision_score, roc_auc_score,
    top_k_accuracy_score
)


CLIP_FEATURES_DIR = "clip_features"
VAL = CLIP_FEATURES_DIR + "/val_features.pt"
SCALER = "scaler_model.joblib"
PCA = "pca_model.joblib"
LIME = "top_k_lime_indices.joblib"

In [7]:
def load_features(file_path):
    data = torch.load(file_path)
    return data["image_features"], data["text_features"], data["filenames"], data["labels"]


# Load train and validation features
val_img_features, val_txt_features, _, val_labels = load_features(VAL)

In [8]:
# Combine image and text features for training
X_val = torch.cat((val_img_features, val_txt_features), dim=1)

# Flatten features into a 2D matrix (samples x features)
X_val = X_val.view(X_val.size(0), -1).numpy()

# Print the shape of the features
print(f"X_val shape: {X_val.shape}")

# Convert labels to NumPy arrays
y_val = val_labels.numpy()  

# Load scaler and PCA models
scaler = joblib.load(SCALER)
pca = joblib.load(PCA)
lime = joblib.load(LIME)

# Scale and transform the features
X_val_scaled = scaler.transform(X_val)
X_val_pca = pca.transform(X_val_scaled)
X_val_lime = X_val_scaled[:, lime]

X_val shape: (1985, 1024)


In [9]:
from classifiers import (
    SVMClassifier, RBFClassifier, RandomForestClassifier, NaiveBayesClassifier, 
    LogisticRegressionClassifier, LDAClassifier, KNNClassifier, DecisionTreeClassifier,
    AdaBoostClassifier, GBMClassifier, XGBoostClassifier
)

from ensembler import EnsemblerClassifier

# Instantiate classifiers
classifiers = [
    SVMClassifier(), RBFClassifier(), RandomForestClassifier(), NaiveBayesClassifier(),
    LogisticRegressionClassifier(), LDAClassifier(), KNNClassifier(),
    DecisionTreeClassifier(), AdaBoostClassifier(), GBMClassifier(),
    XGBoostClassifier()
]

# Define weights
weights = [0.2, 0.0, 0.1, 0.05, 0.1, 0.05, 0.05, 0.1, 0.1, 0.2, 0.05]

# Load models
for clf in classifiers:
    clf.load(model_dir="models_pca_cal")

# Build ensemble input as (classifier, weight) pairs
classifier_weight_pairs = list(zip(classifiers, weights))

# Initialize ensemble
ensemble = EnsemblerClassifier(classifier_weight_pairs)





Loaded model from: models_pca_cal/SVM.joblib
Loaded label encoder from: models_pca_cal/label_encoder.joblib
Loaded model from: models_pca_cal/RBF.joblib
Loaded label encoder from: models_pca_cal/label_encoder.joblib
Loaded model from: models_pca_cal/RandomForest.joblib
Loaded label encoder from: models_pca_cal/label_encoder.joblib
Loaded model from: models_pca_cal/NaiveBayes.joblib
Loaded label encoder from: models_pca_cal/label_encoder.joblib
Loaded model from: models_pca_cal/LogisticRegression.joblib
Loaded label encoder from: models_pca_cal/label_encoder.joblib
Loaded model from: models_pca_cal/LDA.joblib
Loaded label encoder from: models_pca_cal/label_encoder.joblib
Loaded model from: models_pca_cal/KNN.joblib
Loaded label encoder from: models_pca_cal/label_encoder.joblib
Loaded model from: models_pca_cal/DecisionTree.joblib
Loaded label encoder from: models_pca_cal/label_encoder.joblib
Loaded model from: models_pca_cal/AdaBoost.joblib
Loaded label encoder from: models_pca_cal/labe

In [10]:
y_pred = ensemble.classify_proba(X_val_pca[8].reshape(1, -1))
# Print the predicted class

print(f"Predicted class: {y_pred}")
 
print(np.argmax(y_pred, axis=1))

Predicted class: [[7.40883565e-02 2.23999697e-03 8.79527075e-03 1.57509605e-03
  1.22521716e-03 9.62014890e-04 1.67405965e-03 3.11589716e-03
  2.42711439e-03 2.16091279e-03 1.54018904e-03 0.00000000e+00
  1.49867319e-03 7.61401268e-04 2.29724041e-03 6.00492523e-03
  2.23230355e-03 3.74858889e-03 1.76457198e-03 8.23260530e-04
  8.17486921e-04 3.88824037e-04 1.07736010e-03 7.25898047e-04
  5.70689127e-04 0.00000000e+00 1.53197416e-03 1.25823152e-03
  0.00000000e+00 0.00000000e+00 1.10902794e-03 5.91485170e-03
  2.98047198e-03 6.24343286e-04 2.86970665e-04 5.42149989e-04
  1.10656359e-03 1.08614381e-03 0.00000000e+00 2.08343532e-03
  1.13289140e-03 1.06404846e-03 0.00000000e+00 1.95576777e-02
  0.00000000e+00 3.49573848e-03 3.62422506e-02 3.40174458e-01
  3.04951085e-02 1.24138270e-02 3.21770823e-02 7.58584752e-02
  8.56648637e-03 2.22706322e-02 1.06267579e-02 1.34516520e-02
  1.04672922e-02 1.07385722e-02 5.56438489e-02 1.54866086e-02
  1.93315299e-02 1.81417291e-02 1.52714009e-03 1.0878

In [11]:
y_pred = ensemble.classify(X_val_pca)

#print accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f"Accuracy: {accuracy:.4f}")


Accuracy: 0.0055
