In [1]:
import torch
import matplotlib.pyplot as plt
import os
from sklearn.metrics import accuracy_score
import joblib
import sys
import numpy as np
import json
import time
from sklearn.metrics import (
    accuracy_score, recall_score, f1_score, precision_score, roc_auc_score,
    top_k_accuracy_score
)


CLIP_FEATURES_DIR = "clip_features"
VAL = CLIP_FEATURES_DIR + "/val_features.pt"
SCALER = "scaler_model.joblib"
PCA = "pca_model.joblib"
LIME = "top_k_lime_indices.joblib"


MAX_EPOCHS = 1000
EARLY_STOPPING = 500

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Set the random seed for reproducibility
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

In [2]:
def load_features(file_path):
    data = torch.load(file_path)
    return data["image_features"], data["text_features"], data["filenames"], data["labels"]


# Load train and validation features
val_img_features, val_txt_features, _, val_labels = load_features(VAL)

In [3]:
# Combine image and text features for training
X_val = torch.cat((val_img_features, val_txt_features), dim=1)

# Flatten features into a 2D matrix (samples x features)
X_val = X_val.view(X_val.size(0), -1).numpy()

# Print the shape of the features
print(f"X_val shape: {X_val.shape}")

# Convert labels to NumPy arrays
y_val = val_labels.numpy()  

# Load scaler and PCA models
scaler = joblib.load(SCALER)
pca = joblib.load(PCA)
lime = joblib.load(LIME)

# Scale and transform the features
X_val_scaled = scaler.transform(X_val)
X_val_pca = pca.transform(X_val_scaled)
X_val_lime = X_val_scaled[:, lime]

X_val shape: (1985, 1024)


In [4]:
from classifiers import (
    SVMClassifier, RBFClassifier, RandomForestClassifier, NaiveBayesClassifier, 
    LogisticRegressionClassifier, LDAClassifier, KNNClassifier, DecisionTreeClassifier,
    AdaBoostClassifier, GBMClassifier, XGBoostClassifier
)

from ensembler import EnsemblerClassifier

# Instantiate classifiers
classifiers = [
    SVMClassifier(), RBFClassifier(), RandomForestClassifier(), NaiveBayesClassifier(),
    LogisticRegressionClassifier(), LDAClassifier(), KNNClassifier(),
    DecisionTreeClassifier(), AdaBoostClassifier(), GBMClassifier(),
    XGBoostClassifier()
]

for clf in classifiers:
    clf.load()

Loaded model from: models_pca/SVM.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/RBF.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/RandomForest.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/NaiveBayes.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/LogisticRegression.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/LDA.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/KNN.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/DecisionTree.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/AdaBoost.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/GBM.joblib
Loaded label e

In [129]:
import torch
import numpy as np
from ensembler import WeightGeneratorNN, EnsemblerClassifier  # Update if import path is different

# Load model
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
weight_gen = WeightGeneratorNN()
weight_gen.load_state_dict(torch.load("final_weight_generator_hm.pt", map_location=DEVICE))
weight_gen.to(DEVICE)
weight_gen.eval()

# Create input_params (example: random preference vector)
input_params = torch.rand(1, 7, device=DEVICE)
# Create input_parsms 1 0 0 0 0 0 0
#input_params = torch.tensor([[1, 1, 1, 0, 0, 0, 1]], dtype=torch.float32, device=DEVICE)

activation, weights = weight_gen(input_params)

activation_bits = activation.detach().cpu().numpy().flatten()
weight_values = weights.detach().cpu().numpy().flatten()


# Select classifiers with activation > 0.5
activated_indices = np.where(activation_bits > 0.5)[0]
if len(activated_indices) == 0:
    activated_indices = [np.argmax(activation_bits)]
    

selected_classifiers = [
    classifiers[i] for i in activated_indices]
selected_weights = [weight_values[i] for i in activated_indices]

# Normalize weights
selected_weights = np.array(selected_weights)
selected_weights /= selected_weights.sum()

print("Input parameters:", input_params)
print(f"Activation bits: {activation_bits}")
print(f"Selected classifiers: {[type(clf).__name__ for clf in selected_classifiers]}")
print(f"Selected weights: {selected_weights}")
print(len(selected_classifiers), len(selected_weights))

# Create ensemble
ensemble = EnsemblerClassifier(zip(selected_classifiers, selected_weights))



Input parameters: tensor([[0.8026, 0.3005, 0.1742, 0.0322, 0.8275, 0.3683, 0.6743]],
       device='cuda:0')
Activation bits: [-12.801508   -5.8216333   7.8420677  -5.237605    4.2890744  -2.611047
  -4.771107   -8.548449   -5.7745557  -3.5090382   4.6315093]
Selected classifiers: ['RandomForestClassifierWrapper', 'LogisticRegressionClassifier', 'XGBoostClassifier']
Selected weights: [0.43837523 0.46289378 0.09873103]
3 3


In [130]:
start = time.time()
y_pred = ensemble.classify(X_val_pca)
end = time.time()
print(f"Classification time: {end - start:.4f} seconds")
y_proba = ensemble.classify_proba(X_val_pca)


#print accuracy
accuracy = accuracy_score(y_val, y_pred)
print(f"Accuracy: {accuracy:.4f}")


# Print other metrics
recall = recall_score(y_val, y_pred, average='weighted')
print(f"Recall: {recall:.4f}")

f1 = f1_score(y_val, y_pred, average='weighted')
print(f"F1 Score: {f1:.4f}")
precision = precision_score(y_val, y_pred, average='weighted')
print(f"Precision: {precision:.4f}")
roc_auc = roc_auc_score(y_val, y_proba, multi_class='ovr', average='weighted', labels=np.arange(1, 91))
print(f"ROC AUC: {roc_auc:.4f}")
# Calculate top-k accuracy
top_k_accuracy = top_k_accuracy_score(y_val, y_proba, k=5, labels=np.arange(1, 91))
print(f"Top-k Accuracy: {top_k_accuracy:.4f}")


Classification time: 0.1277 seconds
Accuracy: 0.6302
Recall: 0.6302
F1 Score: 0.6090
Precision: 0.6062
ROC AUC: 0.9369
Top-k Accuracy: 0.8856


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
