# Phase 3: Transfer Learning Across Cities

This notebook demonstrates transfer learning for EV charging prediction:
1. Pre-train on data-rich cities (Shenzhen, Amsterdam, Los Angeles)
2. Fine-tune on data-sparse cities (Melbourne, Johannesburg, São Paulo)
3. Compare with training from scratch

**Goal**: Demonstrate 15-30% MAE improvement through transfer learning

In [None]:
import os
import sys
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.insert(0, os.path.abspath('..'))

from api.dataset.common import EVDataset
from api.dataset.distributed import DistributedEVDataset
from api.model.foundation import load_foundation_model
from api.utils import calculate_regression_metrics
from experiment.utils.experiment_tracking import ExperimentTracker

print("✓ Imports successful")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

## Configuration

In [None]:
CONFIG = {
    # Data-rich cities for pre-training
    'pretrain_cities': ['SZH', 'AMS', 'LOA'],
    
    # Data-sparse cities for transfer learning
    'target_cities': ['MEL', 'JHB', 'SPO'],
    
    # Model configuration
    'model_name': 'moment',
    'model_size': 'small',
    'feature': 'volume',
    'auxiliary': 'all',
    
    # Pre-training
    'pretrain_epochs': 50,
    'pretrain_batch_size': 64,
    'pretrain_lr': 1e-4,
    
    # Fine-tuning
    'finetune_epochs': 20,
    'finetune_batch_size': 32,
    'finetune_lr': 1e-5,
    'freeze_encoder': True,
    'unfreeze_layers': 2,
    
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
    'seed': 42
}

torch.manual_seed(CONFIG['seed'])
np.random.seed(CONFIG['seed'])

print("Transfer Learning Configuration:")
for key, value in CONFIG.items():
    print(f"  {key}: {value}")

## Step 1: Pre-train on Data-Rich Cities

In [None]:
print("\n" + "="*60)
print("STEP 1: MULTI-CITY PRE-TRAINING")
print("="*60)

print(f"\nPre-training on: {', '.join(CONFIG['pretrain_cities'])}")

# Load datasets from multiple cities
print("\nLoading datasets...")
city_datasets = {}

for city in CONFIG['pretrain_cities']:
    data_path = f'../data/{city}_remove_zero/'
    print(f"  Loading {city}...")
    
    dataset = EVDataset(
        feature=CONFIG['feature'],
        auxiliary=CONFIG['auxiliary'],
        data_path=data_path,
        pred_type='site'
    )
    
    city_datasets[city] = dataset
    print(f"    {city}: {dataset.feat.shape[1]} sites, {dataset.feat.shape[0]} timesteps")

total_sites = sum(ds.feat.shape[1] for ds in city_datasets.values())
print(f"\n✓ Total sites for pre-training: {total_sites}")

# TODO: Implement multi-city data combination and training
print("\n⚠️  Multi-city training implementation needed")
print("You can use api/dataset/distributed.py as reference")

## Step 2: Fine-tune on Target Cities

In [None]:
print("\n" + "="*60)
print("STEP 2: FINE-TUNING ON TARGET CITIES")
print("="*60)

transfer_results = []

for target_city in CONFIG['target_cities']:
    print(f"\n{'='*60}")
    print(f"Target City: {target_city}")
    print(f"{'='*60}")
    
    # Load target city data
    data_path = f'../data/{target_city}_remove_zero/'
    print(f"Loading {target_city} dataset...")
    
    target_dataset = EVDataset(
        feature=CONFIG['feature'],
        auxiliary=CONFIG['auxiliary'],
        data_path=data_path,
        pred_type='site'
    )
    
    target_dataset.split_cross_validation(
        fold=1,
        total_fold=6,
        train_ratio=0.8,
        valid_ratio=0.1
    )
    
    print(f"  Sites: {target_dataset.feat.shape[1]}")
    print(f"  Training samples: {len(target_dataset.train_feat)}")
    
    # TODO: Load pre-trained model and fine-tune
    print("\n  ⚠️  Fine-tuning implementation needed")
    
    # Placeholder result
    transfer_results.append({
        'city': target_city,
        'sites': target_dataset.feat.shape[1],
        'status': 'template'
    })

print("\n✓ Template for all target cities created")

## Step 3: Baseline - Train from Scratch

In [None]:
print("\n" + "="*60)
print("STEP 3: TRAIN FROM SCRATCH (BASELINE)")
print("="*60)

scratch_results = []

for target_city in CONFIG['target_cities']:
    print(f"\nTraining from scratch on {target_city}...")
    
    # TODO: Train model from random initialization
    print("  ⚠️  From-scratch training implementation needed")
    
    scratch_results.append({
        'city': target_city,
        'status': 'template'
    })

print("\n✓ Baseline templates created")

## Compare Transfer vs From-Scratch

In [None]:
print("\n" + "="*60)
print("TRANSFER LEARNING COMPARISON")
print("="*60)

print("\nWhen implemented, this section will show:")
print("  1. MAE comparison: Transfer vs From-Scratch")
print("  2. Convergence speed: Epochs to reach target performance")
print("  3. Data efficiency: Performance with limited training data")
print("  4. Per-city breakdown")

print("\nExpected result: 15-30% MAE improvement with transfer learning")

## Implementation Roadmap

To complete this notebook:

### 1. Multi-City Data Handling
Reference: `api/dataset/distributed.py`
- Combine datasets from multiple cities
- Handle city-specific normalization
- Create unified data loaders

### 2. Pre-Training Loop
- Load foundation model
- Train on combined SZH+AMS+LOA data
- Save checkpoint: `results/transfer_learning/pretrained/multi_city.pth`

### 3. Fine-Tuning Loop
- Load pre-trained checkpoint
- Freeze encoder, unfreeze last N layers
- Train on target city (MEL/JHB/SPO)
- Evaluate and save results

### 4. Baseline Comparison
- Train same architecture from random initialization
- Use same training data as fine-tuning
- Compare metrics

### Alternative Approach
You can also leverage the existing federated learning infrastructure:
- `example/knowledge_transfer.py` shows multi-city training
- `api/federated/` has client-server architecture
- Adapt for transfer learning instead of federated averaging

In [None]:
# Save template status
results_df = pd.DataFrame({
    'pretrain_cities': [', '.join(CONFIG['pretrain_cities'])] * len(CONFIG['target_cities']),
    'target_city': CONFIG['target_cities'],
    'status': ['template'] * len(CONFIG['target_cities'])
})

results_df.to_csv('../results/transfer_learning/template_status.csv', index=False)

print("\n" + "="*60)
print("PHASE 3 TEMPLATE COMPLETE")
print("="*60)
print("\nTransfer learning framework structure created.")
print("See implementation roadmap above for next steps.")
print("\nRecommended: Start with single-city fine-tuning first,")
print("then scale to multi-city pre-training.")