In [1]:
import sys
import json
from pathlib import Path

# Project paths
src_path = Path("../src")
data_dir = Path("../data")
sys.path.insert(0, str(src_path))

print("Environment ready")

Environment ready


In [2]:
def analyze_model_architectures():
    """
    Analyze the 3 curated model architectures for Pokemon sprite generation.
    
    Evaluates our streamlined selection: lightweight baseline, sprite-optimized, 
    and transformer-enhanced models for artwork-to-sprite translation.
    """
    
    # Load model configurations from repository
    config_path = src_path / "config" / "model_configs.json"
    
    if not config_path.exists():
        print("Model configuration file not available")
        return
    
    with open(config_path, 'r') as f:
        config = json.load(f)
    
    print("Curated Model Architecture Analysis")
    print("=" * 50)
    print("Focus: Top 3 models for sprite generation task")
    
    # Analyze available model configurations
    model_configs = config.get('pix2pix_models', {})
    
    if not model_configs:
        print("No model configurations found")
        return
    
    print(f"\nAnalyzing {len(model_configs)} selected models")
    
    architecture_analysis = {}
    
    for model_name, model_config in model_configs.items():
        print(f"\n{model_name.upper().replace('-', ' ')}:")
        print(f"  Purpose: {model_config.get('description', 'No description')}")
        
        params = model_config.get('parameters', {})
        generator = params.get('generator', {})
        discriminator = params.get('discriminator', {})
        
        # More accurate parameter estimation
        ngf = generator.get('ngf', 64)
        ndf = discriminator.get('ndf', 64)
        n_blocks = generator.get('n_blocks', 9)
        d_layers = discriminator.get('n_layers', 3)
        
        # Parameter calculation for different architectures
        if 'transformer' in model_name:
            # Transformer model has additional parameters
            transformer_layers = generator.get('transformer_layers', 4)
            attention_heads = generator.get('attention_heads', 8)
            transformer_params = transformer_layers * attention_heads * ngf * ngf * 4
            base_params = ngf * ngf * (n_blocks * 2 + 8) + ndf * ndf * (d_layers + 2)
            total_params = (base_params + transformer_params) / 1000000
        else:
            # Standard pix2pix calculation
            gen_params = ngf * ngf * (n_blocks * 2 + 10)
            disc_params = ndf * ndf * (d_layers + 2)
            total_params = (gen_params + disc_params) / 1000000
        
        print(f"  Architecture: {model_config.get('architecture', 'pix2pix')}")
        print(f"  Generator:")
        print(f"    Channels: {generator.get('input_channels', 3)} -> {generator.get('output_channels', 4)}")
        print(f"    Base features: {ngf}")
        print(f"    Residual blocks: {n_blocks}")
        print(f"    Normalization: {generator.get('norm_layer', 'instance')}")
        print(f"    Dropout: {generator.get('dropout', 0.3)}")
        
        # Special features
        if generator.get('use_attention', False):
            print(f"    Features: Self-attention mechanism")
        if generator.get('transformer_layers', 0) > 0:
            print(f"    Features: {generator.get('transformer_layers')} transformer layers")
            print(f"    Features: {generator.get('attention_heads')} attention heads")
        
        print(f"  Discriminator:")
        print(f"    Input channels: {discriminator.get('input_channels', 7)} (artwork+sprite)")
        print(f"    Base features: {ndf}")
        print(f"    Layers: {d_layers}")
        if discriminator.get('use_spectral_norm', False):
            print(f"    Features: Spectral normalization")
        
        print(f"  Training:")
        print(f"    Image size: {params.get('image_size', 256)}px")
        print(f"    L1 loss weight: {params.get('lambda_l1', 150)}")
        if params.get('lambda_perceptual', 0) > 0:
            print(f"    Perceptual loss: {params.get('lambda_perceptual')}")
        if params.get('lambda_pixel_art', 0) > 0:
            print(f"    Pixel art loss: {params.get('lambda_pixel_art')}")
        
        print(f"  Estimated parameters: ~{total_params:.1f}M")
        
        # Task-specific assessment
        suitability_score = 0
        strengths = []
        considerations = []
        
        # Model-specific analysis
        if 'lightweight' in model_name:
            strengths.append("Fast training and inference")
            strengths.append("Low memory requirements")
            strengths.append("Reduced overfitting risk")
            considerations.append("May lack capacity for complex mappings")
            suitability_score = 7  # Good for baseline
            
        elif 'sprite-optimized' in model_name:
            strengths.append("Attention mechanism for detail preservation")
            strengths.append("Optimized loss weights for pixel art")
            strengths.append("Spectral normalization for stability")
            strengths.append("RGBA support with proper channel handling")
            considerations.append("Balanced complexity for dataset size")
            suitability_score = 9  # Recommended primary model
            
        elif 'transformer' in model_name:
            strengths.append("Long-range dependency modeling")
            strengths.append("Advanced attention mechanisms")
            strengths.append("State-of-the-art architecture")
            considerations.append("Higher computational requirements")
            considerations.append("May need careful regularization")
            suitability_score = 8  # Experimental but promising
        
        # Common advantages for all models
        if params.get('image_size', 0) == 256:
            strengths.append("Direct 256px compatibility")
            suitability_score += 1
        
        if generator.get('output_channels', 3) == 4:
            strengths.append("RGBA transparency support")
            suitability_score += 1
        
        if params.get('lambda_l1', 0) >= 150:
            strengths.append("Strong pixel-level accuracy emphasis")
            suitability_score += 1
        
        print(f"  Strengths:")
        for strength in strengths:
            print(f"    + {strength}")
        
        if considerations:
            print(f"  Considerations:")
            for consideration in considerations:
                print(f"    - {consideration}")
        
        print(f"  Suitability Score: {suitability_score}/12")
        architecture_analysis[model_name] = suitability_score
    
    # Implementation strategy
    print(f"\n{'-'*50}")
    print("IMPLEMENTATION STRATEGY")
    print("-" * 50)
    
    # Sort models by score
    sorted_models = sorted(architecture_analysis.items(), key=lambda x: x[1], reverse=True)
    
    print(f"\nRecommended Training Sequence:")
    for i, (model_name, score) in enumerate(sorted_models, 1):
        role = ""
        if 'lightweight' in model_name:
            role = "BASELINE - Quick validation and debugging"
        elif 'sprite-optimized' in model_name:
            role = "PRIMARY - Main production model"
        elif 'transformer' in model_name:
            role = "ADVANCED - Experimental state-of-the-art"
        
        print(f"  {i}. {model_name.replace('-', ' ').title()}")
        print(f"     Role: {role}")
        print(f"     Score: {score}/12")
        print(f"     Use case: {'Start here for quick results' if i == 1 else 'Optimize after baseline' if i == 2 else 'Experiment if resources allow'}")
    
    print(f"\nCurriculum Learning Compatibility:")
    print(f"  All models support:")
    print(f"    • Progressive input scaling (128px -> 192px -> 256px)")
    print(f"    • Fixed 256px sprite output")
    print(f"    • RGBA channel handling")
    print(f"    • Instance normalization (better for style transfer)")
    print(f"    • Optimized loss weights for pixel art")
    
    print(f"\nNext Steps:")
    print(f"  1. Start with lightweight-baseline for rapid prototyping")
    print(f"  2. Validate data pipeline and training stability")
    print(f"  3. Switch to sprite-optimized for production training")
    print(f"  4. Experiment with transformer-enhanced if results plateau")
    print(f"  5. Apply curriculum learning with progressively larger inputs")
    
    return architecture_analysis

# Analyze model architectures
architecture_scores = analyze_model_architectures()

print(f"\nSUMMARY:")
print("=" * 50)
print("Configuration cleaned up from 8+ models to 3 focused architectures:")
print("  • Removed outdated/suboptimal configurations")
print("  • Kept only models with 256px support and RGBA channels")  
print("  • Focused on curriculum learning compatibility")
print("  • Ready for immediate implementation")

Curated Model Architecture Analysis
Focus: Top 3 models for sprite generation task

Analyzing 3 selected models

LIGHTWEIGHT BASELINE:
  Purpose: Lightweight baseline for quick experimentation - Fast training with minimal parameters
  Architecture: pix2pix
  Generator:
    Channels: 3 -> 4
    Base features: 32
    Residual blocks: 6
    Normalization: instance
    Dropout: 0.3
  Discriminator:
    Input channels: 7 (artwork+sprite)
    Base features: 32
    Layers: 2
  Training:
    Image size: 256px
    L1 loss weight: 150.0
  Estimated parameters: ~0.0M
  Strengths:
    + Fast training and inference
    + Low memory requirements
    + Reduced overfitting risk
    + Direct 256px compatibility
    + RGBA transparency support
    + Strong pixel-level accuracy emphasis
  Considerations:
    - May lack capacity for complex mappings
  Suitability Score: 10/12

SPRITE OPTIMIZED:
  Purpose: State-of-the-art configuration optimized specifically for pixel art sprite generation
  Architecture:

In [3]:
# Import optimizers from the dedicated module
import sys
if str(src_path) not in sys.path:
    sys.path.append(str(src_path))

import importlib
if 'optimizers.lr_finder' in sys.modules:
    importlib.reload(sys.modules['optimizers.lr_finder'])

from optimizers.lr_finder import find_optimal_learning_rates

# Run learning rate optimization using the optimizers module
optimal_learning_rates = find_optimal_learning_rates(src_path / "config" / "model_configs.json")

LEARNING RATE OPTIMIZATION
Using model training - no heuristics
Device: cuda
Using synthetic data for LR finding

--- LR finder for: lightweight-baseline ---
Device: cuda
Using synthetic data for LR finding

--- LR finder for: lightweight-baseline ---
LR Range Test: 1.00e-07 → 1.00e+00 (30 iterations)
LR Range Test: 1.00e-07 → 1.00e+00 (30 iterations)
Iter 0: LR 1.00e-07, Loss 90.0913
Iter 10: LR 2.15e-05, Loss 90.0890
Iter 0: LR 1.00e-07, Loss 90.0913
Iter 10: LR 2.15e-05, Loss 90.0890
Iter 20: LR 4.64e-03, Loss 85.3171
Early stop at iteration 23 - loss diverged
Results: Optimal LR = 9.04e-04, Range = 9.04e-05 - 2.71e-03
✓ Completed LR finding for lightweight-baseline

--- LR finder for: sprite-optimized ---
LR Range Test: 1.00e-07 → 1.00e+00 (30 iterations)
Iter 20: LR 4.64e-03, Loss 85.3171
Early stop at iteration 23 - loss diverged
Results: Optimal LR = 9.04e-04, Range = 9.04e-05 - 2.71e-03
✓ Completed LR finding for lightweight-baseline

--- LR finder for: sprite-optimized ---
LR 

In [4]:
# Import batch optimizer from the optimizers module  
import importlib
if 'optimizers.batch_optimizer' in sys.modules:
    importlib.reload(sys.modules['optimizers.batch_optimizer'])
from optimizers.batch_optimizer import optimize_batch_sizes

# Run batch size optimization using the optimizers module
batch_size_recommendations = optimize_batch_sizes(src_path / "config" / "model_configs.json")

BATCH SIZE OPTIMIZATION
Testing memory usage with models
Device: cuda

--- Testing batch sizes for: lightweight-baseline ---
Testing memory usage and training speed...
Batch Size | Memory (MB) | Time (ms) | Status
--------------------------------------------------
        1 |      906.9 |     47.2 | ✓ Success
        2 |       69.1 |     12.1 | ✓ Success
        1 |      906.9 |     47.2 | ✓ Success
        2 |       69.1 |     12.1 | ✓ Success
        4 |      171.4 |     21.4 | ✓ Success
        8 |      377.3 |     26.2 | ✓ Success
        4 |      171.4 |     21.4 | ✓ Success
        8 |      377.3 |     26.2 | ✓ Success
       16 |      786.3 |     66.3 | ✓ Success
       16 |      786.3 |     66.3 | ✓ Success
       32 |     1595.8 |     42.9 | ✓ Success
       32 |     1595.8 |     42.9 | ✓ Success
       64 |     3225.3 |     70.8 | ✓ Success

Dynamic extension check: last_successful=64, max_tested=64

Extending tests beyond 64 since max tested was successful...
       64 |    

Testing memory usage and training speed...
Batch Size | Memory (MB) | Time (ms) | Status
--------------------------------------------------
        1 |      625.1 |     72.1 | ✓ Success
        1 |      625.1 |     72.1 | ✓ Success
        2 |      123.2 |     32.0 | ✓ Success
        2 |      123.2 |     32.0 | ✓ Success
        4 |      126.7 |     21.4 | ✓ Success
        4 |      126.7 |     21.4 | ✓ Success
        8 |      691.0 |     37.7 | ✓ Success
        8 |      691.0 |     37.7 | ✓ Success
       16 |     1223.1 |     49.4 | ✓ Success
       16 |     1223.1 |     49.4 | ✓ Success
       32 |     2701.8 |     96.4 | ✓ Success
       32 |     2701.8 |     96.4 | ✓ Success
       64 |     5662.7 |    150.9 | ✓ Success

Dynamic extension check: last_successful=64, max_tested=64

Extending tests beyond 64 since max tested was successful...
       64 |     5662.7 |    150.9 | ✓ Success

Dynamic extension check: last_successful=64, max_tested=64

Extending tests beyond 64 since m

In [5]:
# Import model validator from the optimizers module
import importlib
if 'optimizers.model_validator' in sys.modules:
    importlib.reload(sys.modules['optimizers.model_validator'])
from optimizers.model_validator import optimize_model_config

# Run model configuration validation using the optimizers module
validation_results = optimize_model_config(src_path / "config" / "model_configs.json")

MODEL VALIDATION
Creating and testing models
Device: cuda
Validating 3 model configurations...

Validating lightweight-baseline...
  ✓ Generator created: 4,128,164 parameters
  ✓ Discriminator created: 169,697 parameters
  ✓ Models moved to cuda
  ✓ Forward pass successful
    Generator: torch.Size([2, 3, 256, 256]) → torch.Size([2, 4, 256, 256])
    Discriminator: torch.Size([2, 3, 256, 256]) + torch.Size([2, 4, 256, 256]) → torch.Size([2, 1, 62, 62])
  ✓ Backward pass successful
    Generator loss: 88.1697
    Discriminator loss: 0.6356
  ✅ lightweight-baseline: VALID

Validating sprite-optimized...
  ✓ Generator created: 30,660,420 parameters
  ✓ Discriminator created: 2,768,833 parameters
  ✓ Models moved to cuda
  ✓ Forward pass successful
    Generator: torch.Size([2, 3, 256, 256]) → torch.Size([2, 4, 256, 256])
    Discriminator: torch.Size([2, 3, 256, 256]) + torch.Size([2, 4, 256, 256]) → torch.Size([2, 1, 30, 30])
  ✓ Models moved to cuda
  ✓ Forward pass successful
    Gener

In [8]:
# Import training schedule optimizer from the optimizers module
import importlib
if 'optimizers.schedule_optimizer' in sys.modules:
    importlib.reload(sys.modules['optimizers.schedule_optimizer'])
from optimizers.schedule_optimizer import create_optimal_training_plan

# Create comprehensive training plan and update config file
print("Creating optimized training schedules and updating configuration...")
training_plans = create_optimal_training_plan(src_path / "config" / "model_configs.json")

# Verify that the config file was updated
print("\n" + "="*60)
print("CONFIGURATION UPDATE VERIFICATION")
print("="*60)

# Load and display the updated config section
with open(src_path / "config" / "model_configs.json", 'r') as f:
    updated_config = json.load(f)

if 'optimized_training_schedules' in updated_config:
    print("[SUCCESS] Optimized training schedules added to config file")
    print(f"[INFO] Found {len(updated_config['optimized_training_schedules'])} optimized schedules")
    
    for model_name, schedule in updated_config['optimized_training_schedules'].items():
        print(f"\n{model_name.upper().replace('-', ' ')}:")
        print(f"  Total epochs: {schedule['total_epochs']}")
        print(f"  Base learning rate: {schedule['base_learning_rate']:.2e}")
        print(f"  GPU requirement: {schedule['gpu_memory_requirement']}")
        print(f"  Curriculum stages: {len(schedule['stages'])}")
        
        for i, stage in enumerate(schedule['stages'], 1):
            print(f"    Stage {i}: {stage['stage_name']} - {stage['epochs']} epochs @ {stage['input_resolution']}px")
else:
    print("[FAIL] Optimized training schedules not found in config file")

print(f"\nOptimized training configuration is now ready for use with train.py")
print(f"Use: python train.py --config <model-name> --schedule optimized")

Creating optimized training schedules and updating configuration...
OPTIMAL TRAINING PLAN FOR POKEMON SPRITE GENERATION

PLAN FOR: LIGHTWEIGHT BASELINE
Creating Curriculum Learning Schedule (lightweight complexity)
STAGE 1: Foundation Stage
  Resolution: 128px → 256px
  Epochs: 30
  Batch size: 45
  Learning rate: 2.00e-04
  Augmentation: minimal
  Focus: Basic shape and structure learning
  Batches per epoch: 17

STAGE 2: Refinement Stage
  Resolution: 192px → 256px
  Epochs: 25
  Batch size: 24
  Learning rate: 1.40e-04
  Augmentation: moderate
  Focus: Detail enhancement and color learning
  Batches per epoch: 32

STAGE 3: Polish Stage
  Resolution: 256px → 256px
  Epochs: 20
  Batch size: 16
  Learning rate: 8.00e-05
  Augmentation: strong
  Focus: Fine details and precise output
  Batches per epoch: 48

TOTAL TRAINING: 75 epochs across 3 stages
EXECUTION ORDER:
  1. Foundation Stage: 30 epochs @ 128px
  2. Refinement Stage: 25 epochs @ 192px
  3. Polish Stage: 20 epochs @ 256px

R