# Lookback Length Analysis

This notebook analyzes model performance across different lookback lengths and generates visualizations showing how MSE changes as lookback length increases.

**Author**: MODE Team

**Date**: 2025-12-24

## 1. Import Libraries

In [None]:
import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
from pathlib import Path

## 2. Define Data Parsing Functions

In [None]:
def parse_result_file(filepath):
    """
    Parse a model result file and extract MSE and MAE values.

    Format:
    (Dataset ModelName)
    Dataset_lookback_pred_model...\nmse:X, mae:Y
    ...
    """
    results = {}
    model_name = Path(filepath).stem

    with open(filepath, 'r') as f:
        lines = f.readlines()

    current_dataset = None
    for line in lines:
        line = line.strip()
        if not line:
            continue

        # Check for dataset header: (Dataset ModelName)
        match = re.match(r'\((\w+)\s+(\w+)\)', line)
        if match:
            dataset = match.group(1)
            current_dataset = dataset
            if dataset not in results:
                results[dataset] = {'model_name': model_name, 'mse': {}, 'mae': {}}
            continue

        # Parse experiment line to get lookback length
        # Format: Dataset_lookback_pred_model...
        if current_dataset and '_' in line:
            parts = line.split('_')
            if len(parts) >= 2:
                try:
                    lookback = int(parts[1])  # Extract lookback length
                    # Next line should contain metrics
                    continue
                except ValueError:
                    pass

        # Parse metrics line
        if line.startswith('mse:') and current_dataset:
            metrics = {}
            for metric in line.split(','):
                key, value = metric.strip().split(':')
                metrics[key] = float(value)

            if lookback not in results[current_dataset]['mse']:
                results[current_dataset]['mse'][lookback] = metrics['mse']
                results[current_dataset]['mae'][lookback] = metrics['mae']

    return results

In [None]:
def load_all_results(directory):
    """Load all model results from the directory."""
    result_files = glob(os.path.join(directory, '*.txt'))
    all_results = {}

    for filepath in result_files:
        model_results = parse_result_file(filepath)
        for dataset, data in model_results.items():
            model_name = data['model_name']
            if dataset not in all_results:
                all_results[dataset] = {}
            all_results[dataset][model_name] = {
                'mse': data['mse'],
                'mae': data['mae']
            }

    return all_results

## 3. Load Data

In [None]:
# Get the directory containing this notebook
notebook_dir = Path.cwd()

print("Loading model results...")
all_results = load_all_results(notebook_dir)

print(f"Found results for datasets: {list(all_results.keys())}")
print(f"Models: {list(all_results.get('ECL', {}).keys())}")

## 4. Create Summary Table

In [None]:
def create_summary_table(all_results):
    """Create a summary table for all datasets and models."""
    summary_data = []

    for dataset in ['ECL', 'ETTm1', 'Weather']:
        if dataset not in all_results:
            continue

        for model_name in sorted(all_results[dataset].keys()):
            model_data = all_results[dataset][model_name]

            for lookback in sorted(model_data['mse'].keys()):
                summary_data.append({
                    'Dataset': dataset,
                    'Model': model_name,
                    'Lookback': lookback,
                    'MSE': model_data['mse'][lookback],
                    'MAE': model_data['mae'][lookback]
                })

    df = pd.DataFrame(summary_data)
    return df

summary_df = create_summary_table(all_results)

## 5. Display Summary Tables

In [None]:
# Display summary table for each dataset
for dataset in ['ECL', 'ETTm1', 'Weather']:
    if dataset not in all_results:
        continue

    print(f"\n{'='*80}")
    print(f"{dataset} DATASET SUMMARY")
    print(f"{'='*80}")

    dataset_df = summary_df[summary_df['Dataset'] == dataset]

    # Create pivot tables
    pivot_mse = dataset_df.pivot(index='Lookback', columns='Model', values='MSE')
    pivot_mae = dataset_df.pivot(index='Lookback', columns='Model', values='MAE')

    print("\nMSE Results:")
    display(pivot_mse.round(4))

    print("\nMAE Results:")
    display(pivot_mae.round(4))

## 6. Create Visualization

In [None]:
def plot_lookback_analysis(all_results, output_dir):
    """Generate lookback analysis plots for each dataset."""
    import matplotlib.ticker as ticker
    
    datasets = ['ECL', 'ETTm1', 'Weather']
    lookback_values = [48, 96, 192, 336, 720]
    lookback_indices = np.arange(len(lookback_values))

    # Color palette with variants for harmony
    model_colors = {
        'MODE': "#3E34FF",            # purple base
        'Transformer_M': "#A571EB",   # purple variant
        'Transformer': '#A571EB',     # purple variant
        'S_Mamba': "#00A6FF",         # blue base
        'Reformer_M': "#EFBF00",      # blue variant
        'Reformer': '#EFBF00',        # blue variant
        'iTransformer': "#48A21B",    # green base
        'Informer_M': "#DD6B6B",      # greenish variant for visibility
        'Informer': '#DD6B6B'         # greenish variant for visibility
    }
    

    # Line styles to pair with color variants (Informer/Reformer/Transformer emphasized)
    model_styles = {
        'MODE': '-',
        'Transformer_M': '--',
        'Transformer': '-',
        'S_Mamba': '-',
        'Reformer_M': '--',
        'Reformer': '-',
        'iTransformer': '-',
        'Informer_M': '--',
        'Informer': '-'
    }

    fig, axes = plt.subplots(1, 3, figsize=(8, 2.6), dpi=600)

    legend_items = {}

    for idx, dataset in enumerate(datasets):
        if dataset not in all_results:
            axes[idx].text(0.5, 0.5, 'No Data', ha='center', va='center',
                          transform=axes[idx].transAxes, fontsize=5, color='#000000')
            axes[idx].set_title(dataset, fontsize=6, color='#000000')
            continue

        ax = axes[idx]

        # Linear scale with evenly spaced ticks
        ax.set_yscale('linear')
        ax.yaxis.set_major_locator(ticker.MaxNLocator(nbins=5, prune=None))
        formatter = ticker.FormatStrFormatter('%.2f')
        ax.yaxis.set_major_formatter(formatter)

        for model_name in sorted(all_results[dataset].keys()):
            color = model_colors.get(model_name, '#000000')
            style = model_styles.get(model_name, '-')

            model_data = all_results[dataset][model_name]
            mse_values = [model_data['mse'].get(lb, np.nan) for lb in lookback_values]

            if not all(np.isnan(mse_values)):
                line, = ax.plot(
                    lookback_indices,
                    mse_values,
                    marker='o',
                    linewidth=0.8,
                    markersize=2.5,
                    color=color,
                    linestyle=style,
                    label=model_name,
                    alpha=0.95
                )
                if model_name not in legend_items:
                    legend_items[model_name] = line

        ax.set_title(f'{dataset}', fontsize=7, fontweight='bold', color='#000000')
        ax.set_xlabel('Lookback Length', fontsize=6, color='#000000')
        if idx == 0:
            ax.set_ylabel('MSE', fontsize=6, color='#000000')

        for spine in ax.spines.values():
            spine.set_color('#000000')
            spine.set_linewidth(0.6)

        # Harmonize tick label sizes/colors for both axes
        ax.tick_params(axis='both', which='both', colors='#000000', labelsize=5, width=0.6)

        ax.grid(False)
        ax.xaxis.grid(True, color='#d0d0d0', linestyle='-', alpha=0.35)

        ax.set_xticks(lookback_indices)
        ax.set_xticklabels(lookback_values, fontsize=5, color='#000000')

    # Custom sort order for legend
    priority_models = ['MODE', 'S_Mamba', 'iTransformer']
    sorted_models = []

    # Add priority models first
    for m in priority_models:
        if m in legend_items:
            sorted_models.append(m)
            
    # Add remaining models alphabetically
    remaining = sorted([m for m in legend_items.keys() if m not in sorted_models])
    sorted_models.extend(remaining)

    handles = [legend_items[m] for m in sorted_models]

    # Force single row for legend
    ncol = len(sorted_models)

    # Reduce spacing between legend and plots
    leg = fig.legend(handles, sorted_models, loc='lower center', bbox_to_anchor=(0.5, 0.02),
               ncol=ncol, fontsize=5, frameon=False, columnspacing=1.0, handlelength=2.0)

    # Bold 'MODE' in legend
    for text in leg.get_texts():
        if text.get_text() == 'MODE':
            text.set_weight('bold')

    plt.tight_layout()
    plt.subplots_adjust(bottom=0.22, wspace=0.15)

    return fig

# Generate the plot
fig = plot_lookback_analysis(all_results, notebook_dir)