In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the data
cf1_results = pd.read_csv('/content/cf1_results.csv')
cf1_results_ltn = pd.read_csv('/content/cf1_results_ltn.csv')

# Calculate the last epoch of the normal model
last_normal_epoch = cf1_results['Epoch'].max()

# Adjust the LTN model epochs to start right after the last epoch of the normal model
cf1_results_ltn['Adjusted Epoch'] = cf1_results_ltn['Epoch'] + last_normal_epoch

# Group by Epoch and calculate mean accuracy for the normal model
avg_cf1_results = cf1_results.groupby('Epoch').agg({'Validation Accuracy': 'mean'}).reset_index()

# Find the maximum test accuracy for each fold and speed in the LTN model
max_accuracy_per_fold_speed = cf1_results_ltn.groupby(['Fold', 'Speed', 'Adjusted Epoch']).agg({'test_accuracy': 'max'}).reset_index()

# Calculate the mean of these maximum accuracies for each adjusted epoch
avg_max_accuracy_per_epoch = max_accuracy_per_fold_speed.groupby('Adjusted Epoch').agg({'test_accuracy': 'mean'}).reset_index()

# Combine averaged data for a continuous plot
combined_avg_results = pd.concat([
    avg_cf1_results.rename(columns={'Validation Accuracy': 'Accuracy', 'Epoch': 'Extended Epoch'}),
    avg_max_accuracy_per_epoch.rename(columns={'test_accuracy': 'Accuracy', 'Adjusted Epoch': 'Extended Epoch'})
])

# Plotting
plt.figure(figsize=(14, 8))
plt.plot(combined_avg_results['Extended Epoch'], combined_avg_results['Accuracy'], label='Average Model Accuracy', marker='o')
plt.title('Average Model Accuracy Across Extended Epochs')
plt.xlabel('Extended Epoch (Normal + LTN)')
plt.ylabel('Average Test Accuracy')
plt.axvline(x=last_normal_epoch, color='r', linestyle='--', label='Start of LTN Model')
plt.legend()
plt.grid(True)
plt.show()


In [7]:

# Standard library imports
import argparse
import csv
import math
import os
import pickle
import random
import re
import sys
import csv


# Append config directory to sys.path
script_dir = os.path.abspath("/home/ubuntu/dds_paper/DDS_Paper/plots/plots.ipynb")  # Absolute dir the script is in
sys.path.append(os.path.join(script_dir, '..', 'config'))

# Third-party library imports
import joblib
import ltn
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from rich.console import Console
from rich.table import Table
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from tensorflow.keras import layers, models, optimizers, callbacks, regularizers
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ModelCheckpoint, TensorBoard
from tensorflow.keras.layers import Dense, Dropout, LSTM, Input
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.losses import CategoricalCrossentropy, SparseCategoricalCrossentropy
from tqdm import tqdm
from numpy import mean

# Local module imports
import config as config
from model_creation import LSTMModel, lr_schedule
from sequence_generation import load_sequences, save_sequences
from model_evaluation import kfold_cross_validation, normalize_importances, permutation_importance_per_class
from pgb_data_processing import overview_csv_files, process_pgb_data
from data_scaling import load_and_scale_data
from util import concatenate_and_delete_ltn_csv_files
import commons as commons
from tensorflow.keras.callbacks import Callback
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from itertools import cycle

# Assuming 'num_classes' is defined (number of unique classes)
y_val_bin = label_binarize(y_val_fold, classes=[i for i in range(num_classes)])

# Colors cycle for plotting
colors = cycle(['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 'pink', 'lightblue'])



kf = KFold(n_splits=n_splits, shuffle=False)
counter = 0
console = Console()
processed_bases = set()

if os.path.exists(processed_file_tracker):
    with open(processed_file_tracker, "r") as file:
        processed_bases = set(file.read().splitlines())

metrics_summary = []

for file in sorted(os.listdir(sequences_directory)):
    if "_train_scaled_sequences.npy" in file:
        
        if counter >= S:
            break
        
        base_name = file.replace("_train_scaled_sequences.npy", "")
        if base_name in processed_bases:
            continue
        processed_bases.add(base_name)
        counter+=1
        
        
            
            
        # Load sequences and labels
        train_sequence_file_path = os.path.join(sequences_directory, f"{base_name}_train_scaled_sequences.npy")
        train_label_file_path = os.path.join(sequences_directory, f"{base_name}_train_scaled_labels.npy")
        X_train, y_train = load_sequences(train_sequence_file_path, train_label_file_path)
        
        test_sequence_file_path = os.path.join(sequences_directory, f"{base_name}_test_scaled_sequences.npy")
        test_label_file_path = os.path.join(sequences_directory, f"{base_name}_test_scaled_labels.npy")
        X_test, y_test = load_sequences(test_sequence_file_path, test_label_file_path)

        # Shuffle the sequences and corresponding labels. Before this they were kept ordered.
        train_indices = np.arange(len(X_train))
        np.random.shuffle(train_indices)
        X_train = X_train[train_indices]
        y_train = y_train[train_indices]

        test_indices = np.arange(len(X_test))
        np.random.shuffle(test_indices)
        X_test = X_test[test_indices]
        y_test = y_test[test_indices]

        # Merge for cross-validation
        X = np.concatenate((X_train, X_test), axis=0)
        y = np.concatenate((y_train, y_test), axis=0)

        input_shape = (sequence_length, num_features)
        fold_metrics = []

        for fold, (train_idx, val_idx) in enumerate(kf.split(X, y)):
            metrics_logger = MetricsLogger(results_path, fold_number=fold+1, base_name=base_name)
            console.print(f"[bold green]Training fold {fold + 1}/{n_splits} for {base_name}[/]")
            X_train_fold, X_val_fold = X[train_idx], X[val_idx]
            y_train_fold, y_val_fold = y[train_idx], y[val_idx]


            
            # load the model
            
            model = tf.keras.models.load_model('/home/ubuntu/dds_paper/DDS_Paper/model_weights/ltn_tf_model_PGB_20_0_fold_1.tf')

            # Plotting
            plt.figure(figsize=(10, 8))
            for i, color in zip(range(num_classes), colors):
                # Predict probabilities for each class
                probs = model.predict(X_val_fold)
                # Compute ROC curve and area the curve
                fpr, tpr, thresholds = roc_curve(y_val_bin[:, i], probs[:, i])
                roc_auc = auc(fpr, tpr)
                plt.plot(fpr, tpr, color=color, lw=1.5,
                        label='ROC curve of class {0} (area = {1:0.2f})'.format(i, roc_auc))

            plt.plot([0, 1], [0, 1], 'k--', lw=2.5)
            plt.xlim([-0.05, 1.0])
            plt.ylim([0.0, 1.05])
            plt.xlabel('False Positive Rate', fontsize=16)
            plt.ylabel('True Positive Rate', fontsize=16)
            plt.title('Receiver Operating Characteristic for Each Class', fontsize=16)
            plt.legend(loc="lower right", fontsize=14)
            plt.show()

ModuleNotFoundError: No module named 'joblib'

In [5]:
!pip install joblib

Collecting joblib
  Obtaining dependency information for joblib from https://files.pythonhosted.org/packages/ae/e2/4dea6313ef2b38442fccbbaf4017e50a6c3c8a50e8ee9b512783e5c90409/joblib-1.4.0-py3-none-any.whl.metadata
  Downloading joblib-1.4.0-py3-none-any.whl.metadata (5.4 kB)
Downloading joblib-1.4.0-py3-none-any.whl (301 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m301.2/301.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: joblib
Successfully installed joblib-1.4.0
