# Load all the log files

In [2]:
import json
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Define paths to log files
log_paths = {
    'fine_tuning_full': '../../down-stream/sentence_classification/fine-tuning/fullset/logs/training_log_20250420_212657.json',
    'fine_tuning_subset': '../../down-stream/sentence_classification/fine-tuning/subset/logs/training_log_20250420_201226.json',
    'from_scratch_full': '../../down-stream/sentence_classification/from-scratch/fullset/logs/training_log_20250420_204009.json',
    'from_scratch_subset': '../../down-stream/sentence_classification/from-scratch/subset/logs/training_log_20250421_015254.json'
}

# Load all logs
logs = {}
for name, path in log_paths.items():
    with open(path, 'r') as f:
        logs[name] = json.load(f)

# Extract metrics for each approach
metrics = {}
for name, log in logs.items():
    epochs = []
    eval_losses = []
    eval_accuracies = []
    train_losses = []
    
    for entry in log:
        if 'eval_loss' in entry:
            epochs.append(entry['epoch'])
            eval_losses.append(entry['eval_loss'])
            eval_accuracies.append(entry['eval_accuracy'])
        elif 'loss' in entry:
            train_losses.append(entry['loss'])
    
    metrics[name] = {
        'epochs': epochs,
        'eval_losses': eval_losses,
        'eval_accuracies': eval_accuracies,
        'train_losses': train_losses
    }

# Training Metrics Comparison

Let's compare the training metrics across all approaches:

In [3]:
# Create figure with secondary y-axis
fig = make_subplots(
    rows=2, cols=1,
    subplot_titles=('Evaluation Loss', 'Evaluation Accuracy'),
    vertical_spacing=0.12
)

colors = {
    'fine_tuning_full': '#1f77b4',
    'fine_tuning_subset': '#ff7f0e',
    'from_scratch_full': '#2ca02c',
    'from_scratch_subset': '#d62728'
}

names = {
    'fine_tuning_full': 'Fine-tuning (Full)',
    'fine_tuning_subset': 'Fine-tuning (50%)',
    'from_scratch_full': 'From Scratch (Full)',
    'from_scratch_subset': 'From Scratch (50%)'
}

# Add traces for each approach
for name, metric in metrics.items():
    # Loss plot
    fig.add_trace(
        go.Scatter(
            x=metric['epochs'],
            y=metric['eval_losses'],
            name=f"{names[name]} (Loss)",
            line=dict(color=colors[name])
        ),
        row=1, col=1
    )
    
    # Accuracy plot
    fig.add_trace(
        go.Scatter(
            x=metric['epochs'],
            y=metric['eval_accuracies'],
            name=f"{names[name]} (Acc)",
            line=dict(color=colors[name])
        ),
        row=2, col=1
    )

# Update layout
fig.update_layout(
    height=800,
    showlegend=True,
    title_text="Training Metrics Comparison",
    hovermode='x unified'
)

# Update axes labels
fig.update_xaxes(title_text="Epoch", row=1, col=1)
fig.update_xaxes(title_text="Epoch", row=2, col=1)
fig.update_yaxes(title_text="Loss", row=1, col=1)
fig.update_yaxes(title_text="Accuracy", row=2, col=1)

fig.show()

# Performance Analysis

Let's analyze the final performance metrics for each approach:

In [4]:
# Create final metrics comparison
final_metrics = []

for name, metric in metrics.items():
    final_metrics.append({
        'Approach': names[name],
        'Final Loss': metric['eval_losses'][-1],
        'Final Accuracy': metric['eval_accuracies'][-1],
        'Best Accuracy': max(metric['eval_accuracies']),
        'Best Loss': min(metric['eval_losses']),
        'Num Epochs': len(metric['epochs'])
    })

df_metrics = pd.DataFrame(final_metrics)
df_metrics = df_metrics.round(4)
df_metrics.style.background_gradient(cmap='RdYlGn', subset=['Final Accuracy', 'Best Accuracy'])\
    .background_gradient(cmap='RdYlGn_r', subset=['Final Loss', 'Best Loss'])

Unnamed: 0,Approach,Final Loss,Final Accuracy,Best Accuracy,Best Loss,Num Epochs
0,Fine-tuning (Full),0.5305,0.901,0.901,0.2732,10
1,Fine-tuning (50%),0.5393,0.889,0.8912,0.2919,10
2,From Scratch (Full),1.0261,0.8545,0.8716,0.3074,10
3,From Scratch (50%),0.9772,0.8542,0.8589,0.3664,10
