In [None]:
from globals import *
import pandas as pd
from multiviewstacking import MultiViewStacking
import numpy as np
import os
import os.path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler

import numpy as np
import random
from pathlib import Path
from sklearn.metrics import confusion_matrix
from datetime import datetime
from tqdm import trange
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Read file.
df = pd.read_csv(FILE_PATH)

# Encode labels into integers.
le = LabelEncoder()
df["class"] = le.fit_transform(df["class"])

# Get features, class
y = df["class"]

# Drop the label.
X = df.drop(["class"], axis = 1)

# Get column names.
colnames = list(X.columns)

# Get column indices for each view.
ind_v1 = [colnames.index(x) for x in colnames if "v1_" in x]
ind_v2 = [colnames.index(x) for x in colnames if "v2_" in x]
ind_v3 = [colnames.index(x) for x in colnames if "v3_" in x]
ind_v4 = [colnames.index(x) for x in colnames if "v4_" in x]

In [None]:
FILE_PATH

In [None]:
# --- Initialize ---
results = None
activated = False

# Folder for results
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_dir = Path(DATASET_PATH + f"results/{timestamp}/")
results_dir.mkdir(parents=True, exist_ok=True)

# Initialize confusion matrix accumulators
labels = le.classes_
n_classes = len(labels)
cm_sum = {
    "MVS": np.zeros((n_classes, n_classes), dtype=float),
    "V1": np.zeros((n_classes, n_classes), dtype=float),
    "V2": np.zeros((n_classes, n_classes), dtype=float),
    "V3": np.zeros((n_classes, n_classes), dtype=float),
    "V4": np.zeros((n_classes, n_classes), dtype=float),
    "AGG": np.zeros((n_classes, n_classes), dtype=float)
}

# --- Run experiments ---
for i in trange(ITERATIONS, desc="Running experiments"):
    
    curr_it = 1 + i
    print(f"\nIteration {curr_it}")

    # Set random seeds
    random_seed = 100 + curr_it
    np.random.seed(random_seed)
    random.seed(random_seed)

    # Split train/test
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, train_size=PCT_TRAIN, stratify=y, random_state=random_seed
    )

    # RandomForest parameters
    rf_params = dict(n_estimators=NTREES, random_state=123, n_jobs=NUMCORES)

    # Create models
    m_v1 = RandomForestClassifier(**rf_params)
    m_v2 = RandomForestClassifier(**rf_params)
    m_v3 = RandomForestClassifier(**rf_params)
    m_v4 = RandomForestClassifier(**rf_params)
    m_meta = RandomForestClassifier(**rf_params)

    model = MultiViewStacking(
        views_indices=[ind_v1, ind_v2, ind_v3, ind_v4],
        first_level_learners=[m_v1, m_v2, m_v3, m_v4],
        meta_learner=m_meta
    )

    # Train Multi-View Stacking model
    model.fit(X_train, y_train)

    # --- Evaluate MVS model ---
    preds_mvs = le.inverse_transform(model.predict(X_test))
    gt = le.inverse_transform(y_test)

    if not activated:
        results = classification_metrics_row(curr_it, "MVS", preds_mvs, gt)
        activated = True
    else:
        results = classification_metrics_row(curr_it, "MVS", preds_mvs, gt, df=results)

    cm_sum["MVS"] += confusion_matrix(gt, preds_mvs, labels=labels)

    # --- Individual view models ---
    fitted_v1 = model.fitted_first_level_learners_[0]
    fitted_v2 = model.fitted_first_level_learners_[1]
    fitted_v3 = model.fitted_first_level_learners_[2]
    fitted_v4 = model.fitted_first_level_learners_[3]

    preds_v1 = le.inverse_transform(fitted_v1.predict(X_test.values[:, ind_v1]))
    results = classification_metrics_row(curr_it, "V1", preds_v1, gt, df=results)
    cm_sum["V1"] += confusion_matrix(gt, preds_v1, labels=labels)

    preds_v2 = le.inverse_transform(fitted_v2.predict(X_test.values[:, ind_v2]))
    results = classification_metrics_row(curr_it, "V2", preds_v2, gt, df=results)
    cm_sum["V2"] += confusion_matrix(gt, preds_v2, labels=labels)

    preds_v3 = le.inverse_transform(fitted_v3.predict(X_test.values[:, ind_v3]))
    results = classification_metrics_row(curr_it, "V3", preds_v3, gt, df=results)
    cm_sum["V3"] += confusion_matrix(gt, preds_v3, labels=labels)

    preds_v4 = le.inverse_transform(fitted_v4.predict(X_test.values[:, ind_v4]))
    results = classification_metrics_row(curr_it, "V4", preds_v4, gt, df=results)
    cm_sum["V4"] += confusion_matrix(gt, preds_v4, labels=labels)

    # --- Aggregated model (single view all features) ---
    m_agg = RandomForestClassifier(**rf_params)
    m_agg.fit(X_train, y_train)
    preds_agg = le.inverse_transform(m_agg.predict(X_test))
    results = classification_metrics_row(curr_it, "AGG", preds_agg, gt, df=results)
    cm_sum["AGG"] += confusion_matrix(gt, preds_agg, labels=labels)


# --- Compute and save normalized average confusion matrices ---
for model_name, cm in cm_sum.items():
    # Average over iterations
    cm_avg = cm / ITERATIONS
    
    # Row-normalize (each row sums to 1)
    cm_norm = cm_avg / cm_avg.sum(axis=1, keepdims=True)
    cm_norm = np.nan_to_num(cm_norm)  # Handle potential division by zero
    
    # Save normalized matrix as CSV
    df_cm = pd.DataFrame(cm_norm, index=labels, columns=labels)
    csv_path = results_dir / f"confusion_matrix_{model_name}_AVG_norm.csv"
    df_cm.to_csv(csv_path)

    # Plot heatmap
    plt.figure(figsize=(8, 6))
    sns.heatmap(df_cm, annot=True, fmt=".2f", cmap="Blues",
                xticklabels=labels, yticklabels=labels, cbar=True,
                vmin=0, vmax=1)
    plt.title(f"Row-Normalized Average Confusion Matrix - {model_name}", fontsize=14)
    plt.xlabel("Predicted Labels")
    plt.ylabel("True Labels")
    plt.tight_layout()

    # Save figure
    plt.savefig(results_dir / f"confusion_matrix_{model_name}_AVG_norm.png", dpi=300)
    plt.close()

# Save overall results summary
results_path = results_dir / "results_summary.csv"
results.to_csv(results_path, index=False)

print(f"Done! Results saved in: {results_dir}")
