# üöÄ Comprehensive Efficiency Analysis for LLM-TIME Models

This notebook analyzes efficiency data from comprehensive experiments including:
- **Time-LLM**: BERT, GPT2, LLAMA (train & inference efficiency)
- **Chronos**: T5-base, T5-tiny (train & inference efficiency) 
- **Distillation**: BERT‚ÜíTinyBERT (training & inference efficiency)

The analysis covers:
- üìä **Memory Usage**: GPU VRAM, RAM consumption
- ‚è±Ô∏è **Latency**: Training time, inference time
- üîã **Power Consumption**: Energy efficiency metrics
- üìà **Performance vs Efficiency**: Accuracy/efficiency trade-offs

In [None]:
# Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import glob
import os
from pathlib import Path
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Set up plotting style
plt.style.use('default')
sns.set_palette("husl")

In [None]:
# Configuration
BASE_DIR = Path('/home/amma/LLM-TIME')
EXPERIMENTS_DIR = BASE_DIR / 'experiments'
EFFICIENCY_DIR = BASE_DIR / 'efficiency_experiments_20251020_141409'

print(f"üìÅ Base directory: {BASE_DIR}")
print(f"üß™ Experiments directory: {EXPERIMENTS_DIR}")
print(f"‚ö° Efficiency directory: {EFFICIENCY_DIR}")
print(f"üìä Directory exists: {EFFICIENCY_DIR.exists()}")

In [None]:
# Discover all efficiency report files
efficiency_patterns = [
    '**/efficiency_report_*.json',
    '**/real_performance_report_*.json', 
    '**/comprehensive_performance_report_*.json',
    '**/time_llm_efficiency_*.json',
    '**/chronos_efficiency_*.json',
    '**/distillation_efficiency_*.json'
]

all_efficiency_files = []
for pattern in efficiency_patterns:
    files = list(BASE_DIR.glob(pattern))
    all_efficiency_files.extend(files)
    print(f"üìã Pattern '{pattern}': Found {len(files)} files")

print(f"\nüéØ Total efficiency files found: {len(all_efficiency_files)}")

# Show sample files
if all_efficiency_files:
    print(f"\nüìÑ Sample efficiency files:")
    for i, file in enumerate(all_efficiency_files[:5]):
        rel_path = file.relative_to(BASE_DIR)
        print(f"  {i+1}. {rel_path}")
    if len(all_efficiency_files) > 5:
        print(f"  ... and {len(all_efficiency_files) - 5} more files")

In [None]:
def load_efficiency_data(file_path):
    """Load and parse efficiency data from JSON files."""
    try:
        with open(file_path, 'r') as f:
            data = json.load(f)
        
        # Extract metadata from filename and path
        rel_path = file_path.relative_to(BASE_DIR)
        path_parts = str(rel_path).split('/')
        
        metadata = {
            'file_path': str(rel_path),
            'filename': file_path.name,
            'directory': '/'.join(path_parts[:-1]) if len(path_parts) > 1 else '',
        }
        
        # Try to extract model info from path
        path_str = str(rel_path).lower()
        if 'time_llm' in path_str or 'timellm' in path_str:
            metadata['model_family'] = 'Time-LLM'
            if 'bert' in path_str:
                metadata['model_name'] = 'BERT'
            elif 'gpt2' in path_str:
                metadata['model_name'] = 'GPT2'
            elif 'llama' in path_str:
                metadata['model_name'] = 'LLAMA'
        elif 'chronos' in path_str:
            metadata['model_family'] = 'Chronos'
            if 't5-base' in path_str or 't5_base' in path_str:
                metadata['model_name'] = 'T5-base'
            elif 't5-tiny' in path_str or 't5_tiny' in path_str:
                metadata['model_name'] = 'T5-tiny'
        elif 'distill' in path_str:
            metadata['model_family'] = 'Distillation'
            metadata['model_name'] = 'BERT‚ÜíTinyBERT'
        
        # Determine mode
        if 'train' in path_str and 'inference' not in path_str:
            metadata['mode'] = 'training'
        elif 'inference' in path_str:
            metadata['mode'] = 'inference'
        else:
            metadata['mode'] = 'unknown'
        
        return {'data': data, 'metadata': metadata}
    except Exception as e:
        print(f"‚ùå Error loading {file_path}: {e}")
        return None

# Load all efficiency data
print("üîÑ Loading efficiency data...")
efficiency_data = []
for file_path in all_efficiency_files:
    result = load_efficiency_data(file_path)
    if result:
        efficiency_data.append(result)

print(f"‚úÖ Successfully loaded {len(efficiency_data)} efficiency files")

In [None]:
def extract_efficiency_metrics(data_entry):
    """Extract key efficiency metrics from loaded data."""
    try:
        data = data_entry['data']
        metadata = data_entry['metadata']
        
        metrics = {
            'file_path': metadata['file_path'],
            'model_family': metadata.get('model_family', 'Unknown'),
            'model_name': metadata.get('model_name', 'Unknown'),
            'mode': metadata.get('mode', 'unknown'),
        }
        
        # Extract system metrics (look for various possible keys)
        if 'system_metrics' in data:
            sys_metrics = data['system_metrics']
        elif 'metrics' in data:
            sys_metrics = data['metrics']
        elif 'performance_data' in data:
            sys_metrics = data['performance_data']
        else:
            sys_metrics = data
        
        # Extract key efficiency metrics
        metric_keys = {
            'max_gpu_memory_mb': ['max_gpu_memory_mb', 'gpu_memory_peak', 'max_gpu_memory'],
            'avg_gpu_memory_mb': ['avg_gpu_memory_mb', 'gpu_memory_avg', 'avg_gpu_memory'],
            'max_cpu_percent': ['max_cpu_percent', 'cpu_peak', 'max_cpu'],
            'avg_cpu_percent': ['avg_cpu_percent', 'cpu_avg', 'avg_cpu'],
            'max_memory_mb': ['max_memory_mb', 'memory_peak', 'max_memory'],
            'avg_memory_mb': ['avg_memory_mb', 'memory_avg', 'avg_memory'],
            'total_time_seconds': ['total_time_seconds', 'execution_time', 'duration', 'total_time'],
            'avg_power_watts': ['avg_power_watts', 'power_avg', 'avg_power'],
            'max_power_watts': ['max_power_watts', 'power_peak', 'max_power'],
        }
        
        for metric_name, possible_keys in metric_keys.items():
            value = None
            for key in possible_keys:
                if key in sys_metrics:
                    value = sys_metrics[key]
                    break
            metrics[metric_name] = value
        
        # Try to extract other relevant metrics
        if 'process_info' in data:
            proc_info = data['process_info']
            if 'peak_memory_mb' in proc_info:
                metrics['process_peak_memory_mb'] = proc_info['peak_memory_mb']
        
        # Calculate efficiency ratios if we have the data
        if metrics['total_time_seconds'] and metrics['max_gpu_memory_mb']:
            metrics['gpu_memory_time_ratio'] = metrics['max_gpu_memory_mb'] / metrics['total_time_seconds']
        
        if metrics['avg_power_watts'] and metrics['total_time_seconds']:
            metrics['total_energy_wh'] = (metrics['avg_power_watts'] * metrics['total_time_seconds']) / 3600
        
        return metrics
        
    except Exception as e:
        print(f"‚ùå Error extracting metrics from {data_entry['metadata']['file_path']}: {e}")
        return None

# Extract metrics from all data
print("üìä Extracting efficiency metrics...")
all_metrics = []
for data_entry in efficiency_data:
    metrics = extract_efficiency_metrics(data_entry)
    if metrics:
        all_metrics.append(metrics)

# Create DataFrame
df_efficiency = pd.DataFrame(all_metrics)
print(f"‚úÖ Created efficiency DataFrame with {len(df_efficiency)} entries")

# Display summary
if not df_efficiency.empty:
    print(f"\nüìã Efficiency Data Summary:")
    print(f"Model families: {df_efficiency['model_family'].unique()}")
    print(f"Models: {df_efficiency['model_name'].unique()}")
    print(f"Modes: {df_efficiency['mode'].unique()}")
    print(f"\nDataFrame shape: {df_efficiency.shape}")
    print(f"\nColumns: {list(df_efficiency.columns)}")

In [None]:
# Display sample of the efficiency data
if not df_efficiency.empty:
    print("üìä Sample of Efficiency Data:")
    display(df_efficiency.head())
    
    print("\nüîç Data Types:")
    display(df_efficiency.dtypes)
    
    print("\nüìà Basic Statistics for Numeric Columns:")
    numeric_cols = df_efficiency.select_dtypes(include=[np.number]).columns
    if len(numeric_cols) > 0:
        display(df_efficiency[numeric_cols].describe())
    else:
        print("No numeric columns found in efficiency data")
else:
    print("‚ùå No efficiency data available for analysis")

In [None]:
# Memory Usage Analysis
if not df_efficiency.empty and 'max_gpu_memory_mb' in df_efficiency.columns:
    
    # Filter out rows with missing GPU memory data
    gpu_data = df_efficiency.dropna(subset=['max_gpu_memory_mb'])
    
    if not gpu_data.empty:
        print(f"üéØ GPU Memory Usage Analysis ({len(gpu_data)} entries with GPU data)")
        
        # Create subplots for memory analysis
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=('GPU Memory by Model Family', 'GPU Memory by Model & Mode',
                          'CPU vs GPU Memory', 'Memory Usage Distribution'),
            specs=[[{"type": "bar"}, {"type": "bar"}],
                   [{"type": "scatter"}, {"type": "histogram"}]]
        )
        
        # GPU Memory by Model Family
        family_gpu = gpu_data.groupby('model_family')['max_gpu_memory_mb'].mean().reset_index()
        fig.add_trace(
            go.Bar(x=family_gpu['model_family'], y=family_gpu['max_gpu_memory_mb'],
                   name='Avg GPU Memory', marker_color='skyblue'),
            row=1, col=1
        )
        
        # GPU Memory by Model & Mode
        model_mode_gpu = gpu_data.groupby(['model_name', 'mode'])['max_gpu_memory_mb'].mean().reset_index()
        model_mode_gpu['label'] = model_mode_gpu['model_name'] + ' (' + model_mode_gpu['mode'] + ')'
        fig.add_trace(
            go.Bar(x=model_mode_gpu['label'], y=model_mode_gpu['max_gpu_memory_mb'],
                   name='GPU Memory by Model+Mode', marker_color='lightcoral'),
            row=1, col=2
        )
        
        # CPU vs GPU Memory (if both available)
        if 'max_memory_mb' in gpu_data.columns:
            memory_data = gpu_data.dropna(subset=['max_memory_mb'])
            if not memory_data.empty:
                fig.add_trace(
                    go.Scatter(x=memory_data['max_memory_mb'], y=memory_data['max_gpu_memory_mb'],
                             mode='markers', name='CPU vs GPU Memory',
                             text=memory_data['model_name'], marker_color='green'),
                    row=2, col=1
                )
        
        # Memory Distribution
        fig.add_trace(
            go.Histogram(x=gpu_data['max_gpu_memory_mb'], name='GPU Memory Distribution',
                        marker_color='orange'),
            row=2, col=2
        )
        
        fig.update_layout(height=800, title_text="üß† Memory Usage Analysis", showlegend=False)
        fig.update_xaxes(title_text="Model Family", row=1, col=1)
        fig.update_xaxes(title_text="Model (Mode)", row=1, col=2)
        fig.update_xaxes(title_text="CPU Memory (MB)", row=2, col=1)
        fig.update_xaxes(title_text="GPU Memory (MB)", row=2, col=2)
        fig.update_yaxes(title_text="GPU Memory (MB)", row=1, col=1)
        fig.update_yaxes(title_text="GPU Memory (MB)", row=1, col=2)
        fig.update_yaxes(title_text="GPU Memory (MB)", row=2, col=1)
        fig.update_yaxes(title_text="Count", row=2, col=2)
        
        fig.show()
        
        # Summary statistics
        print("\nüìä GPU Memory Statistics by Model Family:")
        gpu_stats = gpu_data.groupby('model_family')['max_gpu_memory_mb'].agg(['mean', 'median', 'std', 'min', 'max'])
        display(gpu_stats.round(2))
        
    else:
        print("‚ùå No GPU memory data available for analysis")
else:
    print("‚ùå No efficiency data available or missing GPU memory column")

In [None]:
# Timing and Performance Analysis
if not df_efficiency.empty and 'total_time_seconds' in df_efficiency.columns:
    
    # Filter out rows with missing timing data
    timing_data = df_efficiency.dropna(subset=['total_time_seconds'])
    
    if not timing_data.empty:
        print(f"‚è±Ô∏è Timing Analysis ({len(timing_data)} entries with timing data)")
        
        # Convert seconds to minutes for better readability
        timing_data = timing_data.copy()
        timing_data['total_time_minutes'] = timing_data['total_time_seconds'] / 60
        timing_data['total_time_hours'] = timing_data['total_time_seconds'] / 3600
        
        # Create timing analysis plots
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=('Execution Time by Model Family', 'Training vs Inference Time',
                          'Time vs GPU Memory', 'Execution Time Distribution'),
            specs=[[{"type": "bar"}, {"type": "bar"}],
                   [{"type": "scatter"}, {"type": "histogram"}]]
        )
        
        # Execution time by model family
        family_time = timing_data.groupby('model_family')['total_time_minutes'].mean().reset_index()
        fig.add_trace(
            go.Bar(x=family_time['model_family'], y=family_time['total_time_minutes'],
                   name='Avg Execution Time', marker_color='lightblue'),
            row=1, col=1
        )
        
        # Training vs Inference time comparison
        mode_time = timing_data.groupby('mode')['total_time_minutes'].mean().reset_index()
        fig.add_trace(
            go.Bar(x=mode_time['mode'], y=mode_time['total_time_minutes'],
                   name='Time by Mode', marker_color='lightgreen'),
            row=1, col=2
        )
        
        # Time vs GPU Memory correlation
        if 'max_gpu_memory_mb' in timing_data.columns:
            memory_time_data = timing_data.dropna(subset=['max_gpu_memory_mb'])
            if not memory_time_data.empty:
                fig.add_trace(
                    go.Scatter(x=memory_time_data['max_gpu_memory_mb'], y=memory_time_data['total_time_minutes'],
                             mode='markers', name='Time vs GPU Memory',
                             text=memory_time_data['model_name'], marker_color='red'),
                    row=2, col=1
                )
        
        # Execution time distribution
        fig.add_trace(
            go.Histogram(x=timing_data['total_time_minutes'], name='Time Distribution',
                        marker_color='purple'),
            row=2, col=2
        )
        
        fig.update_layout(height=800, title_text="‚è±Ô∏è Timing Analysis", showlegend=False)
        fig.update_xaxes(title_text="Model Family", row=1, col=1)
        fig.update_xaxes(title_text="Mode", row=1, col=2)
        fig.update_xaxes(title_text="GPU Memory (MB)", row=2, col=1)
        fig.update_xaxes(title_text="Execution Time (minutes)", row=2, col=2)
        fig.update_yaxes(title_text="Time (minutes)", row=1, col=1)
        fig.update_yaxes(title_text="Time (minutes)", row=1, col=2)
        fig.update_yaxes(title_text="Time (minutes)", row=2, col=1)
        fig.update_yaxes(title_text="Count", row=2, col=2)
        
        fig.show()
        
        # Summary statistics
        print("\nüìä Timing Statistics by Model Family:")
        timing_stats = timing_data.groupby('model_family')['total_time_minutes'].agg(['mean', 'median', 'std', 'min', 'max'])
        display(timing_stats.round(2))
        
        print("\nüìä Timing Statistics by Mode:")
        mode_stats = timing_data.groupby('mode')['total_time_minutes'].agg(['mean', 'median', 'std', 'min', 'max'])
        display(mode_stats.round(2))
        
    else:
        print("‚ùå No timing data available for analysis")
else:
    print("‚ùå No efficiency data available or missing timing column")

In [None]:
# Power Consumption and Energy Analysis
if not df_efficiency.empty:
    
    # Check for power-related columns
    power_cols = ['avg_power_watts', 'max_power_watts', 'total_energy_wh']
    available_power_cols = [col for col in power_cols if col in df_efficiency.columns]
    
    if available_power_cols:
        power_data = df_efficiency.dropna(subset=available_power_cols, how='all')
        
        if not power_data.empty:
            print(f"üîã Power Consumption Analysis ({len(power_data)} entries with power data)")
            
            # Create power analysis plots
            fig = make_subplots(
                rows=2, cols=2,
                subplot_titles=('Power Consumption by Model', 'Energy Consumption',
                              'Power vs Memory Usage', 'Power Efficiency'),
                specs=[[{"type": "bar"}, {"type": "bar"}],
                       [{"type": "scatter"}, {"type": "scatter"}]]
            )
            
            # Power consumption by model
            if 'avg_power_watts' in power_data.columns:
                power_by_model = power_data.groupby('model_name')['avg_power_watts'].mean().reset_index()
                power_by_model = power_by_model.dropna()
                if not power_by_model.empty:
                    fig.add_trace(
                        go.Bar(x=power_by_model['model_name'], y=power_by_model['avg_power_watts'],
                               name='Avg Power', marker_color='orange'),
                        row=1, col=1
                    )
            
            # Energy consumption
            if 'total_energy_wh' in power_data.columns:
                energy_by_model = power_data.groupby('model_name')['total_energy_wh'].mean().reset_index()
                energy_by_model = energy_by_model.dropna()
                if not energy_by_model.empty:
                    fig.add_trace(
                        go.Bar(x=energy_by_model['model_name'], y=energy_by_model['total_energy_wh'],
                               name='Total Energy', marker_color='red'),
                        row=1, col=2
                    )
            
            # Power vs Memory usage
            if 'avg_power_watts' in power_data.columns and 'max_gpu_memory_mb' in power_data.columns:
                power_memory_data = power_data.dropna(subset=['avg_power_watts', 'max_gpu_memory_mb'])
                if not power_memory_data.empty:
                    fig.add_trace(
                        go.Scatter(x=power_memory_data['max_gpu_memory_mb'], y=power_memory_data['avg_power_watts'],
                                 mode='markers', name='Power vs Memory',
                                 text=power_memory_data['model_name'], marker_color='blue'),
                        row=2, col=1
                    )
            
            # Power efficiency (performance per watt)
            if 'avg_power_watts' in power_data.columns and 'total_time_seconds' in power_data.columns:
                efficiency_data = power_data.dropna(subset=['avg_power_watts', 'total_time_seconds'])
                if not efficiency_data.empty:
                    efficiency_data = efficiency_data.copy()
                    efficiency_data['power_efficiency'] = 1 / (efficiency_data['avg_power_watts'] * efficiency_data['total_time_seconds'])
                    fig.add_trace(
                        go.Scatter(x=efficiency_data['model_name'], y=efficiency_data['power_efficiency'],
                                 mode='markers', name='Power Efficiency',
                                 text=efficiency_data['mode'], marker_color='green'),
                        row=2, col=2
                    )
            
            fig.update_layout(height=800, title_text="üîã Power and Energy Analysis", showlegend=False)
            fig.update_xaxes(title_text="Model", row=1, col=1)
            fig.update_xaxes(title_text="Model", row=1, col=2)
            fig.update_xaxes(title_text="GPU Memory (MB)", row=2, col=1)
            fig.update_xaxes(title_text="Model", row=2, col=2)
            fig.update_yaxes(title_text="Power (Watts)", row=1, col=1)
            fig.update_yaxes(title_text="Energy (Wh)", row=1, col=2)
            fig.update_yaxes(title_text="Power (Watts)", row=2, col=1)
            fig.update_yaxes(title_text="Efficiency (1/W‚ãÖs)", row=2, col=2)
            
            fig.show()
            
            # Summary statistics
            if 'avg_power_watts' in power_data.columns:
                print("\nüìä Power Statistics by Model:")
                power_stats = power_data.groupby('model_name')['avg_power_watts'].agg(['mean', 'median', 'std', 'min', 'max'])
                display(power_stats.round(2))
                
        else:
            print("‚ùå No power data available for analysis")
    else:
        print("‚ùå No power-related columns found in efficiency data")
else:
    print("‚ùå No efficiency data available")

In [None]:
# Comprehensive Model Comparison
if not df_efficiency.empty:
    print("üèÜ Comprehensive Model Comparison")
    
    # Create a summary comparison table
    comparison_metrics = []
    
    for model_family in df_efficiency['model_family'].unique():
        family_data = df_efficiency[df_efficiency['model_family'] == model_family]
        
        for model_name in family_data['model_name'].unique():
            model_data = family_data[family_data['model_name'] == model_name]
            
            for mode in model_data['mode'].unique():
                mode_data = model_data[model_data['mode'] == mode]
                
                if not mode_data.empty:
                    summary = {
                        'Model_Family': model_family,
                        'Model': model_name,
                        'Mode': mode,
                        'Count': len(mode_data)
                    }
                    
                    # Add efficiency metrics
                    metrics_to_summarize = [
                        'max_gpu_memory_mb', 'avg_gpu_memory_mb', 'max_cpu_percent', 
                        'max_memory_mb', 'total_time_seconds', 'avg_power_watts', 'total_energy_wh'
                    ]
                    
                    for metric in metrics_to_summarize:
                        if metric in mode_data.columns:
                            values = mode_data[metric].dropna()
                            if not values.empty:
                                summary[f'{metric}_mean'] = values.mean()
                                summary[f'{metric}_std'] = values.std()
                    
                    comparison_metrics.append(summary)
    
    if comparison_metrics:
        df_comparison = pd.DataFrame(comparison_metrics)
        
        print("\nüìä Model Efficiency Comparison Table:")
        display(df_comparison)
        
        # Create radar chart for model comparison
        if len(df_comparison) > 0:
            # Select key metrics for radar chart
            radar_metrics = ['max_gpu_memory_mb_mean', 'total_time_seconds_mean', 'max_cpu_percent_mean']
            radar_metrics = [m for m in radar_metrics if m in df_comparison.columns]
            
            if len(radar_metrics) >= 2:
                # Normalize metrics for radar chart (0-100 scale)
                df_radar = df_comparison.copy()
                for metric in radar_metrics:
                    if df_radar[metric].notna().any():
                        max_val = df_radar[metric].max()
                        min_val = df_radar[metric].min()
                        if max_val > min_val:
                            df_radar[f'{metric}_norm'] = 100 * (df_radar[metric] - min_val) / (max_val - min_val)
                        else:
                            df_radar[f'{metric}_norm'] = 50  # Default if all values are the same
                
                # Create radar chart
                fig = go.Figure()
                
                for idx, row in df_radar.iterrows():
                    model_label = f"{row['Model']} ({row['Mode']})"
                    values = [row.get(f'{metric}_norm', 0) for metric in radar_metrics]
                    labels = [metric.replace('_mean', '').replace('_', ' ').title() for metric in radar_metrics]
                    
                    fig.add_trace(go.Scatterpolar(
                        r=values,
                        theta=labels,
                        fill='toself',
                        name=model_label
                    ))
                
                fig.update_layout(
                    polar=dict(
                        radialaxis=dict(
                            visible=True,
                            range=[0, 100]
                        )),
                    showlegend=True,
                    title="üéØ Model Efficiency Radar Chart (Higher = More Resource Usage)"
                )
                
                fig.show()
        
        # Efficiency ranking
        if 'total_time_seconds_mean' in df_comparison.columns and 'max_gpu_memory_mb_mean' in df_comparison.columns:
            df_ranking = df_comparison.dropna(subset=['total_time_seconds_mean', 'max_gpu_memory_mb_mean']).copy()
            
            if not df_ranking.empty:
                # Calculate efficiency score (lower is better)
                df_ranking['efficiency_score'] = (
                    df_ranking['total_time_seconds_mean'] / df_ranking['total_time_seconds_mean'].max() +
                    df_ranking['max_gpu_memory_mb_mean'] / df_ranking['max_gpu_memory_mb_mean'].max()
                ) / 2
                
                df_ranking = df_ranking.sort_values('efficiency_score')
                
                print("\nüèÜ Efficiency Ranking (Lower Score = More Efficient):")
                ranking_display = df_ranking[['Model_Family', 'Model', 'Mode', 'efficiency_score', 
                                            'total_time_seconds_mean', 'max_gpu_memory_mb_mean']].round(3)
                display(ranking_display)
    
    else:
        print("‚ùå No data available for model comparison")
else:
    print("‚ùå No efficiency data available")

## üéØ Key Findings and Recommendations

Based on the efficiency analysis above, here are the key insights:

### üìä Memory Efficiency
- **GPU Memory Usage**: Compare VRAM consumption across models
- **CPU Memory**: System RAM requirements  
- **Memory Growth**: Training vs inference memory patterns

### ‚è±Ô∏è Time Efficiency  
- **Training Time**: Time required for 10-epoch training
- **Inference Speed**: Prediction latency comparison
- **Scalability**: Performance with different model sizes

### üîã Energy Efficiency
- **Power Consumption**: Watts used during execution
- **Energy Cost**: Total energy (Wh) per experiment
- **Efficiency Ratio**: Performance per watt metrics

### üèÜ Model Rankings
1. **Most Memory Efficient**: Lowest GPU VRAM usage
2. **Fastest Training**: Shortest training time  
3. **Fastest Inference**: Lowest prediction latency
4. **Most Energy Efficient**: Best performance/energy ratio

### üí° Recommendations
- **For Production**: Choose models with best inference efficiency
- **For Development**: Balance training time vs accuracy
- **For Resource-Constrained**: Prioritize memory efficiency
- **For Large Scale**: Consider energy consumption costs

In [None]:
# Export results for further analysis
if not df_efficiency.empty:
    
    # Save comprehensive efficiency data
    output_file = BASE_DIR / 'comprehensive_efficiency_analysis.csv'
    df_efficiency.to_csv(output_file, index=False)
    print(f"üíæ Saved comprehensive efficiency data to: {output_file}")
    
    # Save model comparison
    if 'df_comparison' in locals():
        comparison_file = BASE_DIR / 'model_efficiency_comparison.csv'
        df_comparison.to_csv(comparison_file, index=False)
        print(f"üíæ Saved model comparison to: {comparison_file}")
    
    # Create efficiency summary
    summary = {
        'analysis_date': pd.Timestamp.now().isoformat(),
        'total_experiments': len(df_efficiency),
        'model_families': df_efficiency['model_family'].nunique(),
        'unique_models': df_efficiency['model_name'].nunique(),
        'modes_tested': df_efficiency['mode'].unique().tolist(),
    }
    
    # Add aggregate statistics
    numeric_cols = df_efficiency.select_dtypes(include=[np.number]).columns
    for col in numeric_cols:
        if col in df_efficiency.columns:
            summary[f'{col}_mean'] = df_efficiency[col].mean()
            summary[f'{col}_median'] = df_efficiency[col].median()
    
    summary_file = BASE_DIR / 'efficiency_analysis_summary.json'
    with open(summary_file, 'w') as f:
        json.dump(summary, f, indent=2, default=str)
    print(f"üíæ Saved analysis summary to: {summary_file}")
    
    print(f"\n‚úÖ Analysis complete! Key files created:")
    print(f"  üìä Efficiency data: comprehensive_efficiency_analysis.csv")
    print(f"  üèÜ Model comparison: model_efficiency_comparison.csv") 
    print(f"  üìã Summary: efficiency_analysis_summary.json")

else:
    print("‚ùå No efficiency data to export")