In [1]:
import torch
import matplotlib.pyplot as plt
import os
from sklearn.metrics import accuracy_score
import joblib
import sys
import numpy as np
import json
import time
from sklearn.metrics import (
    accuracy_score, recall_score, f1_score, precision_score, roc_auc_score,
    top_k_accuracy_score
)


CLIP_FEATURES_DIR = "clip_features"
VAL = CLIP_FEATURES_DIR + "/val_features.pt"
SCALER = "scaler_model.joblib"
PCA = "pca_model.joblib"
LIME = "top_k_lime_indices.joblib"


MAX_EPOCHS = 1000
EARLY_STOPPING = 500

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Set the random seed for reproducibility
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)

In [2]:
def load_features(file_path):
    data = torch.load(file_path)
    return data["image_features"], data["text_features"], data["filenames"], data["labels"]


# Load train and validation features
val_img_features, val_txt_features, _, val_labels = load_features(VAL)

In [3]:
# Combine image and text features for training
X_val = torch.cat((val_img_features, val_txt_features), dim=1)

# Flatten features into a 2D matrix (samples x features)
X_val = X_val.view(X_val.size(0), -1).numpy()

# Print the shape of the features
print(f"X_val shape: {X_val.shape}")

# Convert labels to NumPy arrays
y_val = val_labels.numpy()  

# Load scaler and PCA models
scaler = joblib.load(SCALER)
pca = joblib.load(PCA)
lime = joblib.load(LIME)

# Scale and transform the features
X_val_scaled = scaler.transform(X_val)
X_val_pca = pca.transform(X_val_scaled)
X_val_lime = X_val_scaled[:, lime]

X_val shape: (1985, 1024)


In [4]:
from classifiers import (
    SVMClassifier, RBFClassifier, RandomForestClassifier, NaiveBayesClassifier, 
    LogisticRegressionClassifier, LDAClassifier, KNNClassifier, DecisionTreeClassifier,
    AdaBoostClassifier, GBMClassifier, XGBoostClassifier
)

from ensembler import EnsemblerClassifier

# Instantiate classifiers
classifiers = [
    SVMClassifier(), RBFClassifier(), RandomForestClassifier(), NaiveBayesClassifier(),
    LogisticRegressionClassifier(), LDAClassifier(), KNNClassifier(),
    DecisionTreeClassifier(), AdaBoostClassifier(), GBMClassifier(),
    XGBoostClassifier()
]

for clf in classifiers:
    clf.load()

Loaded model from: models_pca/SVM.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/RBF.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/RandomForest.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/NaiveBayes.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/LogisticRegression.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/LDA.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/KNN.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/DecisionTree.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/AdaBoost.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/GBM.joblib
Loaded label e

In [5]:
# Load JSON file
with open('ensemble_results.json', 'r') as file:
    data = json.load(file)

# Function to process each entry
def process_entry(entry):
    metrics = {
        "accuracy": entry["accuracy"],
        "recall": entry["recall"],
        "f1_score": entry["f1_score"],
        "precision": entry["precision"],
        "roc_auc": entry["roc_auc"],
        "top5_accuracy": entry["top5_accuracy"],
        "inference_time": entry["inference_time"]
    }

    weights = entry["selected_weights"]
    activations = entry["activation_bits"]

    weight_index = 0
    updated_weights = []

    for bit in activations:
        if bit > 0.5 and weight_index < len(weights):
            updated_weights.append(weights[weight_index])
            weight_index += 1
        else:
            updated_weights.append(0.0)

    return {
        "metrics": metrics,
        "updated_weights": updated_weights
    }

# Dictionary to hold results for each key
results = {}

# Loop through all keys in the file
for key, entry in data.items():
    results[key] = process_entry(entry)

# Example: print everything nicely
for key, info in results.items():
    print(f"\n--- {key.upper()} ---")
    print("Metrics:")
    for m_key, m_val in info["metrics"].items():
        print(f"  {m_key}: {m_val}")
    print("Updated Weights:")
    print(info["updated_weights"])


--- ACC ---
Metrics:
  accuracy: 0.6493702770780856
  recall: 0.6493702770780856
  f1_score: 0.6189334693848085
  precision: 0.6173809469340443
  roc_auc: 0.9416404505495262
  top5_accuracy: 0.8997481108312343
  inference_time: 9.356147527694702
Updated Weights:
[0.07719512283802032, 0.16135337948799133, 0.06362076848745346, 0.16351158916950226, 0.17402200400829315, 0.0, 0.0, 0.0, 0.08711770921945572, 0.13290242850780487, 0.14027704298496246]

--- RECALL ---
Metrics:
  accuracy: 0.6337531486146095
  recall: 0.6337531486146095
  f1_score: 0.6142130942594919
  precision: 0.6203917490051634
  roc_auc: 0.9405445894466619
  top5_accuracy: 0.9032745591939546
  inference_time: 8.8707594871521
Updated Weights:
[0.303228497505188, 0.13721773028373718, 0.09513887763023376, 0.23675239086151123, 0.0, 0.08771230280399323, 0.0, 0.03424537926912308, 0.10570481419563293, 0.0, 0.0]

--- F1 ---
Metrics:
  accuracy: 0.6463476070528967
  recall: 0.6463476070528967
  f1_score: 0.6223835033139069
  precisi

In [6]:
metrics = []

metric_keys = [
    "accuracy", "recall", "f1_score",
    "precision", "roc_auc", "top5_accuracy",
    "inference_time"
]

# Go through each key and extract just the metric values
for entry in data.values():
    for key in metric_keys:
        metrics.append(entry[key])

print(metrics)

[0.6493702770780856, 0.6493702770780856, 0.6189334693848085, 0.6173809469340443, 0.9416404505495262, 0.8997481108312343, 9.356147527694702, 0.6337531486146095, 0.6337531486146095, 0.6142130942594919, 0.6203917490051634, 0.9405445894466619, 0.9032745591939546, 8.8707594871521, 0.6463476070528967, 0.6463476070528967, 0.6223835033139069, 0.616577005825025, 0.9401041911798013, 0.9007556675062972, 3.3466830253601074, 0.6418136020151134, 0.6418136020151134, 0.6168829099108196, 0.6205134816104734, 0.9378252997330851, 0.9042821158690176, 8.779212713241577, 0.5622166246851386, 0.5622166246851386, 0.5641753237466651, 0.5998869952655739, 0.93533977324051, 0.9133501259445844, 3.2631583213806152, 0.5118387909319899, 0.5118387909319899, 0.4773661410895331, 0.47109952159362783, 0.9290893661009583, 0.890176322418136, 5.258897304534912, 0.46700251889168765, 0.46700251889168765, 0.4384078701932947, 0.43136658709544345, 0.7925719522263934, 0.745088161209068, 0.049428701400756836]


In [14]:
from ensembler import FinalWeightGeneratorNN

#load model
model_path = "final_weight_generator.pth"
if os.path.exists(model_path):
    model = FinalWeightGeneratorNN()
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
    model.to(DEVICE)
    print("Model loaded successfully.")

Model loaded successfully.


In [17]:
input_params = torch.rand(1, 7, device=DEVICE)
print("Input parameters:", input_params)
# Create input_parsms 1 0 0 0 0 0 0
#input_params = torch.tensor([[1, 0, 0, 0, 0, 0, 0]], dtype=torch.float32, device=DEVICE)


metrics_tensor = torch.tensor(metrics, dtype=torch.float32, device=DEVICE).unsqueeze(0)

input_tensor = torch.cat((input_params, metrics_tensor), dim=1)  # Concatenate along the feature dimension
print("Input tensor shape:", input_tensor.shape)


Input parameters: tensor([[0.3264, 0.9967, 0.0706, 0.2893, 0.5037, 0.1414, 0.5756]],
       device='cuda:0')
Input tensor shape: torch.Size([1, 56])


In [18]:
activation, weights = model(input_tensor)

activation_bits = activation.detach().cpu().numpy().flatten()
weight_values = weights.detach().cpu().numpy().flatten()
    

# Select classifiers with activation > 0.5
activated_indices = np.where(activation_bits > 0.5)[0]
if len(activated_indices) == 0:
    activated_indices = [np.argmax(activation_bits)]
    

selected_classifiers = [
    classifiers[i] for i in activated_indices]
selected_weights = [weight_values[i] for i in activated_indices]

# Normalize weights
selected_weights = np.array(selected_weights)
selected_weights /= selected_weights.sum()

print("Input parameters:", input_params)
print(f"Activation bits: {activation_bits}")
print(f"Selected classifiers: {[type(clf).__name__ for clf in selected_classifiers]}")
print(f"Selected weights: {selected_weights}")

# Create ensemble
ensemble = EnsemblerClassifier(zip(selected_classifiers, selected_weights))

Input parameters: tensor([[0.3264, 0.9967, 0.0706, 0.2893, 0.5037, 0.1414, 0.5756]],
       device='cuda:0')
Activation bits: [ 1.5121626e+00  1.4294205e+00  7.5284433e-01  1.6619697e+00
 -9.1480551e+00  1.0439637e-01 -9.8547602e+00  3.7888288e-03
  8.4746683e-01 -8.9760895e+00 -8.0085325e+00]
Selected classifiers: ['SVMClassifier', 'RBFClassifier', 'RandomForestClassifierWrapper', 'NaiveBayesClassifier', 'AdaBoostClassifierWrapper']
Selected weights: [0.32425222 0.16633856 0.11918914 0.2666491  0.12357098]
