# We ran this file to aggregate metrics and then plot

In [None]:
import os
import glob
import torch

root_directory = '/home/ray/nfs/autolang_storage/projects/divyam/primary_ft_training'

def find_best_val_acc(folder_path):
    ckpt_files = glob.glob(os.path.join(folder_path, '**', 'training_metrics.ckpt'), recursive=True)
    
    best_acc = 0.0  
    for ckpt_file in ckpt_files:
        epoch_list, _, _, _, val_acc_list = load_training_metrics(ckpt_file)
        if val_acc_list:
            best_acc = max(best_acc, max(val_acc_list))
    
    return best_acc

# Function to load training metrics from a checkpoint file
def load_training_metrics(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    train_accs = []
    val_accs = []
    
    for epoch in checkpoint['epoch_list']:
        train_accs.append(checkpoint['train_acc_list'][epoch].item())
        val_accs.append(checkpoint['val_acc_list'][epoch].item())
    
    return checkpoint['epoch_list'], checkpoint['train_loss_list'], train_accs, checkpoint['val_loss_list'],val_accs

# Loop through all model folders and find the best validation accuracy
for folder_name in os.listdir(root_directory):
    folder_path = os.path.join(root_directory, folder_name)
    if os.path.isdir(folder_path):
        best_acc = find_best_val_acc(folder_path)
        print(f'Model: {folder_name}, Best Validation Accuracy: {best_acc}')


In [None]:
import os
import json
import torch


def load_training_metrics(checkpoint_path):
    checkpoint = torch.load(checkpoint_path)
    train_accs = []
    val_accs = []
    
    for epoch in checkpoint['epoch_list']:
        try:
            train_accs.append(checkpoint['train_acc_list'][epoch].item())
            val_accs.append(checkpoint['val_acc_list'][epoch].item())
        except:
            train_accs.append(checkpoint['train_acc_list'][epoch])
            val_accs.append(checkpoint['val_acc_list'][epoch])
    
    return checkpoint['epoch_list'], checkpoint['train_loss_list'], train_accs, checkpoint['val_loss_list'],val_accs


def find_training_metrics_files(root_dir):
    results = {}
    for root, _, files in os.walk(root_dir):
        if "training_metrics.ckpt" in files:
            folder_name = os.path.basename(root)
            checkpoint_path = os.path.join(root, "training_metrics.ckpt")
            
            epoch_list, train_loss_list, train_accs, val_loss_list, val_accs = load_training_metrics(checkpoint_path)
            
            results[folder_name] = {
                'checkpoint_path': checkpoint_path,
                'epoch_list': epoch_list,
                'train_loss_list': train_loss_list,
                'train_accs': train_accs,
                'val_loss_list': val_loss_list,
                'val_accs': val_accs
            }
    return results

def create_jsonl_file(data, output_file):
    with open(output_file, 'w') as file:
        for folder_name, metrics_data in data.items():
            json.dump({folder_name: metrics_data}, file)
            file.write('\n')

if __name__ == "__main__":
    root_directory = "/home/ray/nfs/autolang_storage/projects/divyam/distillation_training"
    output_jsonl_file = "/home/ray/nfs/autolang_storage/projects/divyam/distillation_metrics.jsonl"

    metrics_data = find_training_metrics_files(root_directory)
    create_jsonl_file(metrics_data, output_jsonl_file)
    print(f"JSONL file '{output_jsonl_file}' created with metrics data.")


In [1]:
import json
import os
import matplotlib.pyplot as plt

if not os.path.exists("plots"):
    os.makedirs("plots")

def create_and_save_plot(model_data, model_name, max_epochs=None):
    plt.figure(figsize=(10, 5))
    
    if max_epochs is not None:
        model_data['epoch_list'] = model_data['epoch_list'][:max_epochs]
        model_data['train_loss_list'] = model_data['train_loss_list'][:max_epochs]
        model_data['val_loss_list'] = model_data['val_loss_list'][:max_epochs]
        model_data['train_accs'] = model_data['train_accs'][:max_epochs]
        model_data['val_accs'] = model_data['val_accs'][:max_epochs]
    
    plt.subplot(1, 2, 1)
    plt.plot(model_data['epoch_list'], model_data['train_loss_list'], label='Training Loss')
    plt.plot(model_data['epoch_list'], model_data['val_loss_list'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title(f'{model_name} - Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(model_data['epoch_list'], model_data['train_accs'], label='Training Accuracy')
    plt.plot(model_data['epoch_list'], model_data['val_accs'], label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title(f'{model_name} - Accuracy')
    plt.legend()

    # Save the figure inside the "plots" directory
    plt.savefig(os.path.join("plots", f'{model_name}.png'))
    plt.close()

with open("metrics.jsonl", 'r') as jsonl_file:
    for line in jsonl_file:
        model_data = json.loads(line)
        model_name = list(model_data.keys())[0]
        max_epochs = 30  # Specify the maximum number of epochs to plot (adjust as needed)
        create_and_save_plot(model_data[model_name], model_name, max_epochs)
