In [None]:
import torch
import matplotlib.pyplot as plt
import os
from sklearn.metrics import accuracy_score
import joblib
import sys
import numpy as np
import json
import time
from sklearn.metrics import (
    accuracy_score, recall_score, f1_score, precision_score, roc_auc_score,
    top_k_accuracy_score
)


CLIP_FEATURES_DIR = "clip_features"
VAL = CLIP_FEATURES_DIR + "/val_features.pt"
SCALER = "scaler_model.joblib"
PCA = "pca_model.joblib"
LIME = "top_k_lime_indices.joblib"


MAX_EPOCHS = 100
EARLY_STOPPING = 10

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Set the random seed for reproducibility
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)


In [2]:
def load_features(file_path):
    data = torch.load(file_path)
    return data["image_features"], data["text_features"], data["filenames"], data["labels"]


# Load train and validation features
val_img_features, val_txt_features, _, val_labels = load_features(VAL)

In [3]:
# Combine image and text features for training
X_val = torch.cat((val_img_features, val_txt_features), dim=1)

# Flatten features into a 2D matrix (samples x features)
X_val = X_val.view(X_val.size(0), -1).numpy()

# Print the shape of the features
print(f"X_val shape: {X_val.shape}")

# Convert labels to NumPy arrays
y_val = val_labels.numpy()  

# Load scaler and PCA models
scaler = joblib.load(SCALER)
pca = joblib.load(PCA)
lime = joblib.load(LIME)

# Scale and transform the features
X_val_scaled = scaler.transform(X_val)
X_val_pca = pca.transform(X_val_scaled)
X_val_lime = X_val_scaled[:, lime]

X_val shape: (1985, 1024)


In [4]:
from classifiers import (
    SVMClassifier, RBFClassifier, RandomForestClassifier, NaiveBayesClassifier, 
    LogisticRegressionClassifier, LDAClassifier, KNNClassifier, DecisionTreeClassifier,
    AdaBoostClassifier, GBMClassifier, XGBoostClassifier
)

from ensembler import EnsemblerClassifier

# Instantiate classifiers
classifiers = [
    SVMClassifier(), RBFClassifier(), RandomForestClassifier(), NaiveBayesClassifier(),
    LogisticRegressionClassifier(), LDAClassifier(), KNNClassifier(),
    DecisionTreeClassifier(), AdaBoostClassifier(), GBMClassifier(),
    XGBoostClassifier()
]

for clf in classifiers:
    clf.load()

Loaded model from: models_pca/SVM.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/RBF.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/RandomForest.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/NaiveBayes.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/LogisticRegression.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/LDA.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/KNN.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/DecisionTree.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/AdaBoost.joblib
Loaded label encoder from: models_pca/label_encoder.joblib
Loaded model from: models_pca/GBM.joblib
Loaded label e

In [None]:
from ensembler import WeightGeneratorNN, train_step

model = WeightGeneratorNN().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
train_history = []
best_score = -float("inf")

for epoch in range(MAX_EPOCHS):
    # ✅ Generate new input_params each epoch
    input_params = torch.rand(1, 7, device=DEVICE)  # Shape: (1, 7)

    score, loss = train_step(model, optimizer, input_params, X_val_pca, y_val, classifiers, device=DEVICE)
    train_history.append((score, loss))

    print(f"Epoch {epoch + 1}: Score={score:.4f}, Loss={loss:.4f}")

    # ✅ Check for convergence / early stopping
    if score > best_score:
        best_score = score
        epochs_without_improvement = 0
        torch.save(model.state_dict(), "best_weight_generator.pt")  # Save best model
    else:
        epochs_without_improvement += 1

    if epochs_without_improvement >= EARLY_STOPPING:
        print(f"Early stopping at epoch {epoch + 1}")
        break

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1: Score=0.2667, Loss=-0.2667


NameError: name 'best_score' is not defined

In [None]:
scores, losses = zip(*train_history)
plt.figure(figsize=(10, 5))
plt.plot(scores, label="Validation Accuracy")
plt.plot(losses, label="Loss")
plt.xlabel("Epoch")
plt.legend()
plt.title("Weight Generator Training Progress")
plt.grid()
plt.show()