In [None]:
# =============================================================================
# MYTHRA GLYPHNET - MODEL TRAINING SYSTEM
# =============================================================================

import os
import yaml
import json
import shutil
import zipfile
import requests
import replicate
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv
from IPython.display import display, HTML, Markdown

# Load environment variables from .env file
load_dotenv()

# Get API token from environment
api_token = os.getenv("REPLICATE_API_TOKEN")
if api_token:
    os.environ["REPLICATE_API_TOKEN"] = api_token
    print(f"✅ API token loaded from .env (ends with: ...{api_token[-8:]})")
else:
    print("❌ REPLICATE_API_TOKEN not found in .env file")
    print("💡 Please add your token to .env file: REPLICATE_API_TOKEN=your_token_here")

# Training configuration
TRAINING_CONFIG = {
    'base_dir': 'results/training_data',
    'configs_dir': '../replicate/training',
    'models_dir': 'results/trained_models',
    'logs_dir': 'results/training_logs'
}

# Create training directories
for dir_path in TRAINING_CONFIG.values():
    Path(dir_path).mkdir(parents=True, exist_ok=True)

print("🚀 MYTHRA GLYPHNET TRAINING SYSTEM")
print("=" * 60)
print("🎯 Purpose: Train specialized glyph generation models")
print("📁 Training data:", TRAINING_CONFIG['base_dir'])
print("⚙️ Configs:", TRAINING_CONFIG['configs_dir'])
print("🏷️ Models:", TRAINING_CONFIG['models_dir'])
print("📊 Logs:", TRAINING_CONFIG['logs_dir'])
print("=" * 60)


In [None]:
# =============================================================================
# TRAINING DATA MANAGEMENT
# =============================================================================

def analyze_training_data():
    """Analyze available training data for model preparation"""
    base_dir = Path(TRAINING_CONFIG['base_dir'])
    glyph_sources = Path('results/glyphs')
    
    print("📊 TRAINING DATA ANALYSIS")
    print("=" * 50)
    
    # Check existing training data
    if base_dir.exists():
        for subdir in base_dir.iterdir():
            if subdir.is_dir():
                files = list(subdir.glob('*'))
                image_files = [f for f in files if f.suffix in ['.png', '.webp', '.svg']]
                print(f"📁 {subdir.name}: {len(image_files)} images")
    
    print("\n📈 SOURCE GLYPH DATA:")
    print("-" * 30)
    
    # Check source glyphs
    total_glyphs = 0
    glyph_stats = {}
    
    if glyph_sources.exists():
        for subdir in ['generic', 'dream', 'sumi', 'mythra']:
            subdir_path = glyph_sources / subdir
            if subdir_path.exists():
                files = list(subdir_path.glob('*'))
                image_files = [f for f in files if f.suffix in ['.png', '.webp', '.svg']]
                glyph_stats[subdir] = len(image_files)
                total_glyphs += len(image_files)
                print(f"🌀 {subdir}: {len(image_files)} glyphs")
    
    print(f"\n📊 TOTAL AVAILABLE: {total_glyphs} glyphs")
    
    # Training recommendations
    print("\n💡 TRAINING RECOMMENDATIONS:")
    print("-" * 40)
    
    for model_type, count in glyph_stats.items():
        if count >= 50:
            status = "✅ Ready for training"
        elif count >= 20:
            status = "⚠️ Minimal - consider generating more"
        else:
            status = "❌ Insufficient - need more data"
        print(f"{model_type.upper()}: {count} images - {status}")
    
    return glyph_stats

def prepare_training_dataset(model_type, source_dir=None, target_name=None):
    """
    Prepare training dataset for a specific model type
    
    Args:
        model_type: 'gglyphs', 'dglyphs', 'sumi', 'mythra'
        source_dir: Override source directory
        target_name: Override target dataset name
    """
    if not source_dir:
        source_mapping = {
            'gglyphs': 'results/glyphs/generic',
            'dglyphs': 'results/glyphs/dream',
            'sumi': 'results/glyphs/sumi',
            'mythra': 'results/glyphs/mythra'
        }
        source_dir = source_mapping.get(model_type)
    
    if not source_dir or not Path(source_dir).exists():
        print(f"❌ Source directory not found: {source_dir}")
        return None
    
    target_name = target_name or f"{model_type}_dataset_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    target_dir = Path(TRAINING_CONFIG['base_dir']) / target_name
    target_dir.mkdir(exist_ok=True)
    
    # Copy files
    source_path = Path(source_dir)
    files_copied = 0
    
    print(f"📦 Preparing {model_type.upper()} training dataset...")
    print(f"📂 Source: {source_dir}")
    print(f"🎯 Target: {target_dir}")
    
    for file_path in source_path.glob('*'):
        if file_path.suffix in ['.png', '.webp', '.svg']:
            target_file = target_dir / file_path.name
            shutil.copy2(file_path, target_file)
            files_copied += 1
    
    # Create dataset metadata
    metadata = {
        'model_type': model_type,
        'source_directory': str(source_dir),
        'target_directory': str(target_dir),
        'files_count': files_copied,
        'created_at': datetime.now().isoformat(),
        'file_types': ['.png', '.webp', '.svg'],
        'purpose': f'Training data for {model_type} model'
    }
    
    metadata_file = target_dir / 'dataset_metadata.yaml'
    with open(metadata_file, 'w') as f:
        yaml.dump(metadata, f, default_flow_style=False)
    
    print(f"✅ Dataset prepared: {files_copied} files")
    print(f"📄 Metadata saved: {metadata_file}")
    
    return str(target_dir)

def create_training_zip(dataset_dir, zip_name=None):
    """Create a ZIP file of training data for upload"""
    dataset_path = Path(dataset_dir)
    
    if not dataset_path.exists():
        print(f"❌ Dataset directory not found: {dataset_dir}")
        return None
    
    zip_name = zip_name or f"{dataset_path.name}.zip"
    zip_path = Path(TRAINING_CONFIG['base_dir']) / zip_name
    
    print(f"📦 Creating training ZIP: {zip_name}")
    
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for file_path in dataset_path.glob('*'):
            if file_path.is_file() and file_path.suffix in ['.png', '.webp', '.svg']:
                zipf.write(file_path, file_path.name)
    
    zip_size = zip_path.stat().st_size / (1024 * 1024)  # MB
    print(f"✅ ZIP created: {zip_path}")
    print(f"📏 Size: {zip_size:.2f} MB")
    
    return str(zip_path)


In [None]:
# Example generations - uncomment and modify as needed

# SUMI model with sumi prompt template (default)
# generate_glyph("dragon", style="sumi", model="sumi")

# MYTHRA model with mythra prompt template
# generate_glyph("phoenix", "fire", "rebirth", style="mythra", model="mythra")

# Mix and match: SUMI model with generic template
# generate_glyph("horse", style="generic", model="sumi")

# MYTHRA model with sumi-style template 
# generate_glyph("wolf", "moon", "wisdom", style="sumi", model="mythra")

# Quick test - generate a snake glyph using SUMI model
generate_glyph("snake", model="sumi")


❌ Error generating glyph: 'subject'


In [None]:
# Example generations - uncomment and modify as needed

# SUMI model with sumi prompt template (default)
# generate_glyph("dragon", style="sumi", model="sumi")

# MYTHRA model with mythra prompt template
# generate_glyph("phoenix", "fire", "rebirth", style="mythra", model="mythra")

# Mix and match: SUMI model with generic template
# generate_glyph("horse", style="generic", model="sumi")

# MYTHRA model with sumi-style template 
# generate_glyph("wolf", "moon", "wisdom", style="sumi", model="mythra")

# Quick test - generate a snake glyph using SUMI model
generate_glyph("snake", model="sumi")


❌ Error generating glyph: 'subject'


In [None]:
# Example generations - uncomment and modify as needed

# SUMI model with sumi prompt template (default)
# generate_glyph("dragon", style="sumi", model="sumi")

# MYTHRA model with mythra prompt template
# generate_glyph("phoenix", "fire", "rebirth", style="mythra", model="mythra")

# Mix and match: SUMI model with generic template
# generate_glyph("horse", style="generic", model="sumi")

# MYTHRA model with sumi-style template 
# generate_glyph("wolf", "moon", "wisdom", style="sumi", model="mythra")

# Quick test - generate a snake glyph using SUMI model
generate_glyph("snake", model="sumi")


❌ Error generating glyph: 'subject'


In [None]:
# =============================================================================
# TRAINING CONFIGURATION MANAGEMENT
# =============================================================================

def load_training_config(config_name):
    """Load training configuration from YAML file"""
    config_path = Path(TRAINING_CONFIG['configs_dir']) / f"{config_name}.yaml"
    
    if not config_path.exists():
        print(f"❌ Config file not found: {config_path}")
        return None
    
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    
    print(f"✅ Loaded config: {config_name}")
    return config

def list_training_configs():
    """List all available training configurations"""
    configs_dir = Path(TRAINING_CONFIG['configs_dir'])
    
    if not configs_dir.exists():
        print(f"❌ Configs directory not found: {configs_dir}")
        return []
    
    config_files = list(configs_dir.glob('*.yaml'))
    
    print("⚙️ AVAILABLE TRAINING CONFIGURATIONS:")
    print("=" * 50)
    
    configs = []
    for config_file in config_files:
        config_name = config_file.stem
        try:
            with open(config_file, 'r') as f:
                config = yaml.safe_load(f)
            
            model_name = config.get('model_name', 'Unknown')
            description = config.get('description', 'No description')
            
            print(f"📋 {config_name}")
            print(f"   Model: {model_name}")
            print(f"   Description: {description}")
            print()
            
            configs.append(config_name)
        except Exception as e:
            print(f"⚠️ Error reading {config_name}: {e}")
    
    return configs

def create_custom_config(model_name, base_config='gglyphs-config', custom_params=None):
    """Create a custom training configuration"""
    
    # Load base configuration
    base_config_path = Path(TRAINING_CONFIG['configs_dir']) / f"{base_config}.yaml"
    
    if not base_config_path.exists():
        print(f"❌ Base config not found: {base_config}")
        return None
    
    with open(base_config_path, 'r') as f:
        config = yaml.safe_load(f)
    
    # Update with custom parameters
    config['model_name'] = model_name
    config['created_at'] = datetime.now().isoformat()
    
    if custom_params:
        for key, value in custom_params.items():
            if key in config:
                config[key] = value
            else:
                print(f"⚠️ Unknown parameter: {key}")
    
    # Save custom config
    custom_config_name = f"{model_name.lower().replace(' ', '_')}_config"
    custom_config_path = Path(TRAINING_CONFIG['configs_dir']) / f"{custom_config_name}.yaml"
    
    with open(custom_config_path, 'w') as f:
        yaml.dump(config, f, default_flow_style=False)
    
    print(f"✅ Created custom config: {custom_config_name}")
    print(f"📁 Saved to: {custom_config_path}")
    
    return custom_config_name

def preview_training_config(config_name):
    """Preview a training configuration with key parameters highlighted"""
    config = load_training_config(config_name)
    
    if not config:
        return
    
    print(f"🔍 TRAINING CONFIG PREVIEW: {config_name}")
    print("=" * 60)
    
    # Key parameters to highlight
    key_params = [
        'model_name', 'description', 'base_model', 'trigger_word',
        'learning_rate', 'num_train_epochs', 'lora_rank', 'resolution',
        'train_batch_size', 'lora_type'
    ]
    
    for param in key_params:
        if param in config:
            print(f"📌 {param}: {config[param]}")
    
    print("\n🎯 TRAINING PARAMETERS:")
    print("-" * 30)
    
    training_params = config.get('training_parameters', {})
    for key, value in training_params.items():
        print(f"   {key}: {value}")
    
    if 'validation_prompts' in config:
        print(f"\n✅ Validation prompts: {len(config['validation_prompts'])} prompts")
    
    return config


In [None]:
# =============================================================================
# REPLICATE TRAINING INTERFACE
# =============================================================================

def start_training(config_name, dataset_path=None, destination=None):
    """
    Start a training job on Replicate
    
    Args:
        config_name: Name of training configuration
        dataset_path: Path to training data (local or URL)
        destination: Replicate model destination (username/model-name)
    """
    
    # Load configuration
    config = load_training_config(config_name)
    if not config:
        return None
    
    print(f"🚀 STARTING TRAINING JOB")
    print("=" * 40)
    print(f"📋 Config: {config_name}")
    print(f"🎯 Model: {config.get('model_name', 'Unknown')}")
    
    if not destination:
        print("❌ Error: destination required (e.g., 'username/model-name')")
        return None
    
    if not dataset_path:
        print("❌ Error: dataset_path required")
        return None
    
    try:
        # Prepare training inputs from config
        training_inputs = {
            'input_images': dataset_path,
            'trigger_word': config.get('trigger_word', 'GLYPH'),
            'learning_rate': config.get('learning_rate', 1e-4),
            'num_train_epochs': config.get('num_train_epochs', 1000),
            'lora_rank': config.get('lora_rank', 16),
            'resolution': config.get('resolution', 1024),
            'train_batch_size': config.get('train_batch_size', 1),
            'lora_type': config.get('lora_type', 'standard')
        }
        
        # Add any additional parameters from config
        if 'training_parameters' in config:
            training_inputs.update(config['training_parameters'])
        
        print(f"📊 Training inputs prepared:")
        for key, value in training_inputs.items():
            print(f"   {key}: {value}")
        
        print(f"\n🎯 Destination: {destination}")
        print(f"🔄 Starting training job...")
        
        # Start training (commented out for safety - uncomment when ready)
        # training = replicate.trainings.create(
        #     version="recraft-ai/recraft-v3-svg:latest",
        #     input=training_inputs,
        #     destination=destination
        # )
        
        # For now, just return the prepared inputs
        print("⚠️ Training job preparation complete!")
        print("💡 Uncomment the replicate.trainings.create() call to start actual training")
        
        # Log training attempt
        log_training_attempt(config_name, training_inputs, destination)
        
        return {
            'config_name': config_name,
            'inputs': training_inputs,
            'destination': destination,
            'status': 'prepared'
        }
        
    except Exception as e:
        print(f"❌ Training error: {e}")
        return None

def log_training_attempt(config_name, inputs, destination):
    """Log training attempt for tracking"""
    log_data = {
        'timestamp': datetime.now().isoformat(),
        'config_name': config_name,
        'destination': destination,
        'training_inputs': inputs,
        'status': 'attempted'
    }
    
    log_file = Path(TRAINING_CONFIG['logs_dir']) / f"training_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.yaml"
    
    with open(log_file, 'w') as f:
        yaml.dump(log_data, f, default_flow_style=False)
    
    print(f"📝 Training attempt logged: {log_file}")

def monitor_training(training_id=None):
    """Monitor training progress (placeholder for future implementation)"""
    print("🔍 TRAINING MONITORING")
    print("=" * 30)
    
    if not training_id:
        print("💡 This function will monitor training progress")
        print("💡 Provide training_id when available from Replicate")
        return
    
    # Future implementation:
    # training = replicate.trainings.get(training_id)
    # print(f"Status: {training.status}")
    # print(f"Progress: {training.logs}")
    
    print(f"🔄 Monitoring training: {training_id}")
    print("⚠️ Monitoring functionality to be implemented")

def list_training_logs():
    """List all training logs"""
    logs_dir = Path(TRAINING_CONFIG['logs_dir'])
    
    if not logs_dir.exists():
        print("📭 No training logs found")
        return []
    
    log_files = list(logs_dir.glob('training_log_*.yaml'))
    
    print("📊 TRAINING HISTORY")
    print("=" * 40)
    
    for log_file in sorted(log_files, reverse=True):
        try:
            with open(log_file, 'r') as f:
                log_data = yaml.safe_load(f)
            
            timestamp = log_data.get('timestamp', 'Unknown')
            config_name = log_data.get('config_name', 'Unknown')
            destination = log_data.get('destination', 'Unknown')
            status = log_data.get('status', 'Unknown')
            
            print(f"🕐 {timestamp}")
            print(f"   Config: {config_name}")
            print(f"   Destination: {destination}")
            print(f"   Status: {status}")
            print()
            
        except Exception as e:
            print(f"⚠️ Error reading {log_file.name}: {e}")
    
    return log_files


In [None]:
# Analyze what training data is available
stats = analyze_training_data()

print("\n" + "="*60)
print("🎯 Next steps based on analysis:")
print("1. If insufficient data, generate more glyphs using glyph_codex.ipynb")
print("2. If sufficient data, proceed to prepare training datasets")
print("3. Review available training configurations")
print("="*60)


In [None]:
# List all available training configurations
available_configs = list_training_configs()

print("\n🔍 DETAILED CONFIG PREVIEW:")
print("-" * 40)

# Preview key configurations for gGlyphs and dGlyphs
for config in ['gglyphs-config', 'dglyphs-config']:
    if config in [c.replace('.yaml', '') for c in available_configs]:
        print(f"\n📋 {config.upper()}:")
        preview_config = load_training_config(config)
        if preview_config:
            print(f"   Trigger: {preview_config.get('trigger_word', 'N/A')}")
            print(f"   Learning Rate: {preview_config.get('learning_rate', 'N/A')}")
            print(f"   Epochs: {preview_config.get('num_train_epochs', 'N/A')}")
            print(f"   LoRA Rank: {preview_config.get('lora_rank', 'N/A')}")
    else:
        print(f"⚠️ {config} not found - may need to be created")


In [None]:
# Prepare training datasets for different model types

print("📦 PREPARING TRAINING DATASETS")
print("=" * 50)

# Uncomment to prepare datasets:

# 1. Prepare gGlyphs (generic) dataset
# gglyphs_dataset = prepare_training_dataset('gglyphs')
# if gglyphs_dataset:
#     gglyphs_zip = create_training_zip(gglyphs_dataset, 'gglyphs_training.zip')

# 2. Prepare dGlyphs (dream) dataset  
# dglyphs_dataset = prepare_training_dataset('dglyphs')
# if dglyphs_dataset:
#     dglyphs_zip = create_training_zip(dglyphs_dataset, 'dglyphs_training.zip')

# 3. Prepare SUMI style dataset
# sumi_dataset = prepare_training_dataset('sumi')
# if sumi_dataset:
#     sumi_zip = create_training_zip(sumi_dataset, 'sumi_training.zip')

# 4. Prepare MYTHRA style dataset
# mythra_dataset = prepare_training_dataset('mythra')  
# if mythra_dataset:
#     mythra_zip = create_training_zip(mythra_dataset, 'mythra_training.zip')

print("💡 Uncomment the lines above to prepare your training datasets!")
print("📁 Datasets will be created in:", TRAINING_CONFIG['base_dir'])
print("📦 ZIP files will be ready for upload to Replicate")


In [None]:
# Start training jobs for new models

print("🚀 TRAINING JOB PREPARATION")
print("=" * 50)

# Example training job setups (uncomment when ready):

# 1. Train gGlyphs model
# gglyphs_training = start_training(
#     config_name='gglyphs-config',
#     dataset_path='path/to/gglyphs_training.zip',  # Upload ZIP to cloud storage first
#     destination='yourusername/gglyphs-model'
# )

# 2. Train dGlyphs model
# dglyphs_training = start_training(
#     config_name='dglyphs-config', 
#     dataset_path='path/to/dglyphs_training.zip',  # Upload ZIP to cloud storage first
#     destination='yourusername/dglyphs-model'
# )

print("💡 TRAINING WORKFLOW:")
print("1. ✅ Prepare your datasets (above)")
print("2. 📤 Upload ZIP files to cloud storage (Google Drive, S3, etc.)")
print("3. 🔗 Get public URLs for the ZIP files")
print("4. 🚀 Use start_training() with the URLs")
print("5. 🔍 Monitor progress with monitor_training()")

print("\n📋 REQUIRED PARAMETERS:")
print("- config_name: Training configuration to use")
print("- dataset_path: Public URL to training ZIP file")  
print("- destination: Your Replicate model destination (username/model-name)")

print("\n⚠️ IMPORTANT:")
print("- Training ZIP files must be publicly accessible URLs")
print("- Make sure you have sufficient Replicate credits")
print("- Training can take several hours depending on dataset size")


In [None]:
# Monitor training progress and manage trained models

print("🔍 TRAINING MONITORING & MANAGEMENT")
print("=" * 50)

# Check training history
training_logs = list_training_logs()

print(f"\n📊 Found {len(training_logs)} training log(s)")

# Monitor active training (when you have training IDs)
print("\n🔄 ACTIVE TRAINING MONITORING:")
print("Use monitor_training('training_id') when you have active jobs")

# Example monitoring call:
# monitor_training('training_abc123')

print("\n🎯 POST-TRAINING STEPS:")
print("1. 🏷️ Update MODELS dict in glyph_codex.ipynb with new model IDs")
print("2. 🧪 Test new models with validation prompts")
print("3. 📊 Compare quality with existing models")
print("4. 🔄 Iterate on training parameters if needed")

print("\n💡 TIPS:")
print("- Save model IDs immediately after training completes")
print("- Test with known prompts to evaluate quality")
print("- Consider fine-tuning parameters for better results")
print("- Document model capabilities and limitations")


In [None]:
# Create custom training configurations for specialized models

print("🛠️ CUSTOM CONFIGURATION CREATION")
print("=" * 50)

# Example: Create a custom configuration for a hybrid model
# custom_params = {
#     'trigger_word': 'HYBRIDGLYPH',
#     'learning_rate': 2e-4,
#     'num_train_epochs': 1500,
#     'lora_rank': 32,
#     'description': 'Hybrid model combining gGlyph and dGlyph characteristics'
# }

# hybrid_config = create_custom_config(
#     model_name='Hybrid Glyphs',
#     base_config='gglyphs-config',
#     custom_params=custom_params
# )

print("💡 CREATE CUSTOM CONFIGS FOR:")
print("- Specialized glyph styles (e.g., Celtic, Egyptian, Modern)")
print("- Different art styles (sketch, photorealistic, abstract)")
print("- Specific use cases (logos, tattoos, icons)")
print("- Experimental parameter combinations")

print("\n📝 CUSTOM PARAMETER OPTIONS:")
print("- trigger_word: Unique activation word")
print("- learning_rate: Training speed (1e-5 to 1e-3)")
print("- num_train_epochs: Training duration (500-3000)")
print("- lora_rank: Model capacity (8-64)")
print("- resolution: Image size (512, 1024)")
print("- train_batch_size: Memory usage (1-4)")

# Preview any existing custom configs
print("\n🔍 PREVIEW CUSTOM CONFIG:")
# Uncomment to preview:
# preview_training_config('your_custom_config_name')


In [None]:
print("🎓 TRAINING BEST PRACTICES")
print("=" * 60)

print("📊 DATA REQUIREMENTS:")
print("✅ gGlyphs: 50+ high-quality generic symbols")
print("✅ dGlyphs: 50+ diverse amalgamated symbols")  
print("✅ Style models: 30+ consistent style examples")
print("✅ Image quality: High resolution, clean backgrounds")
print("✅ Consistency: Similar composition and framing")

print("\n⚙️ PARAMETER TUNING:")
print("🔥 High learning rate (1e-3): Fast training, risk of instability")
print("🐌 Low learning rate (1e-5): Slow but stable training")
print("🎯 Recommended start: 1e-4 for most cases")
print()
print("📈 LoRA Rank:")
print("   8-16: Simple styles, fast training")
print("   32-64: Complex styles, detailed features")
print()
print("🔄 Epochs:")
print("   500-1000: Quick experiments")
print("   1000-2000: Production quality")
print("   2000+: High-detail specialized models")

print("\n🚨 COMMON ISSUES & SOLUTIONS:")
print("❌ Overfitting: Reduce epochs or learning rate")
print("❌ Underfitting: Increase epochs or learning rate")  
print("❌ Memory errors: Reduce batch size or resolution")
print("❌ Poor quality: Check data consistency")
print("❌ Slow convergence: Increase learning rate carefully")

print("\n💰 COST OPTIMIZATION:")
print("💡 Start with smaller datasets for testing")
print("💡 Use lower resolutions for initial experiments")
print("💡 Monitor training - stop early if converged")
print("💡 Reuse successful parameter combinations")

print("\n🎯 SUCCESS METRICS:")
print("📈 Training loss decreases steadily")
print("🎨 Generated samples improve over time")
print("✅ Validation prompts produce expected results")
print("🔄 Model generalizes to new prompts")
print("⚡ Inference speed is acceptable")

print("\n📞 SUPPORT & RESOURCES:")
print("📚 Replicate documentation: https://replicate.com/docs")
print("💬 Community: Replicate Discord/forums")
print("🛠️ Training configs: replicate/training/ directory")
print("📊 Logs: results/training_logs/ directory")


## 📤 Google Drive Link Setup

In [6]:
# =============================================================================
# 📤 GOOGLE DRIVE CONVERTER & TRAINING SETUP
# =============================================================================

def convert_gdrive_link(share_url):
    """Convert Google Drive share link to direct download URL"""
    if 'drive.google.com/file/d/' in share_url:
        # Extract file ID from share URL
        file_id = share_url.split('/file/d/')[1].split('/')[0]
        direct_url = f"https://drive.google.com/uc?export=download&id={file_id}"
        return direct_url
    elif 'drive.google.com/drive/folders/' in share_url:
        print("❌ This is a FOLDER link, not a FILE link!")
        print("💡 For training, you need to:")
        print("   1. Create a ZIP file of your Midjourney images")
        print("   2. Upload the ZIP file to Google Drive")
        print("   3. Share the ZIP FILE (not the folder)")
        print("   4. Use the ZIP file's share link here")
        print("\n📋 Example ZIP file link format:")
        print("   https://drive.google.com/file/d/1ABC123DEF456/view?usp=sharing")
        return None
    else:
        print("❌ Invalid Google Drive share URL format")
        print("💡 Expected format: https://drive.google.com/file/d/FILE_ID/view?usp=sharing")
        return None

# =============================================================================
# 📤 PASTE YOUR GOOGLE DRIVE SHARE LINK HERE
# =============================================================================

# PASTE YOUR GOOGLE DRIVE ZIP FILE SHARE LINK BELOW (between the quotes):
MIDJOURNEY_ZIP_SHARE_LINK = "https://drive.google.com/file/d/1Zz-dvpw6-Wtc2deuwAp5B6SIoRurGp26/view?usp=drive_link"

# ⚠️ IMPORTANT: This must be a ZIP FILE link, not a folder link!
# 
# STEPS TO GET THE RIGHT LINK:
# 1. Create a ZIP file containing your Midjourney images
# 2. Upload the ZIP file to Google Drive  
# 3. Right-click the ZIP file → Share → Copy link
# 4. Paste that link above
#
# Example ZIP file format: "https://drive.google.com/file/d/1ABC123DEF456GHI789/view?usp=sharing"
# NOT a folder format: "https://drive.google.com/drive/folders/..."

# Convert to direct download URL
print("🔄 Converting Google Drive share link...")
if MIDJOURNEY_ZIP_SHARE_LINK:
    MIDJOURNEY_ZIP_DIRECT_URL = convert_gdrive_link(MIDJOURNEY_ZIP_SHARE_LINK)
    
    if MIDJOURNEY_ZIP_DIRECT_URL:
        print("✅ Google Drive link converted successfully!")
        print(f"📤 Share link: {MIDJOURNEY_ZIP_SHARE_LINK[:50]}...")
        print(f"📥 Direct URL: {MIDJOURNEY_ZIP_DIRECT_URL[:50]}...")
        print("🎯 Ready for training!")
    else:
        print("❌ Failed to convert Google Drive link")
        print("💡 Please check your share link format")
else:
    print("⚠️ Please paste your Google Drive share link in the MIDJOURNEY_ZIP_SHARE_LINK variable above")
    MIDJOURNEY_ZIP_DIRECT_URL = None

# Test the URL (optional)
def test_training_url(url):
    """Quick test if the URL is accessible"""
    if not url:
        return False
    
    try:
        import requests
        response = requests.head(url, timeout=10)
        if response.status_code == 200:
            print(f"✅ URL is accessible!")
            content_length = response.headers.get('content-length')
            if content_length:
                size_mb = int(content_length) / (1024 * 1024)
                print(f"📦 File size: {size_mb:.1f} MB")
            return True
        else:
            print(f"❌ URL returned status code: {response.status_code}")
            return False
    except Exception as e:
        print(f"❌ Error testing URL: {e}")
        return False

# Uncomment to test your URL:
# if MIDJOURNEY_ZIP_DIRECT_URL:
#     test_training_url(MIDJOURNEY_ZIP_DIRECT_URL)

🔄 Converting Google Drive share link...
✅ Google Drive link converted successfully!
📤 Share link: https://drive.google.com/file/d/1Zz-dvpw6-Wtc2deuw...
📥 Direct URL: https://drive.google.com/uc?export=download&id=1Zz...
🎯 Ready for training!


In [8]:
# =============================================================================
# 🎨 MIDJOURNEY CODEX TRAINING - READY TO USE
# =============================================================================

if MIDJOURNEY_ZIP_DIRECT_URL:
    print("🚀 READY TO START MIDJOURNEY CODEX TRAINING")
    print("=" * 60)
    
    # Train gGlyph codex model using your Midjourney images
    print("🎨 Training Universal gGlyph Codex...")
    print(f"📊 Dataset URL: {MIDJOURNEY_ZIP_DIRECT_URL[:50]}...")
    print(f"🎯 Will create model: conorbyrnes04/meru")
    
    # START TRAINING - Uncomment the lines below when ready:
    """
    midjourney_codex_training = start_training(
        config_name='midjourney-svg-config',
        dataset_path=MIDJOURNEY_ZIP_DIRECT_URL,
        destination='conorbyrnes04/meru'  # MERU model
    )
    """
    
    print("\n💡 TO START TRAINING:")
    print("1. ✅ Make sure your Google Drive link is set above")
    print("2. ✅ Verify the URL is accessible (uncomment test)")
    print("3. ✅ Update the destination username above")
    print("4. 🚀 Remove the triple quotes around start_training() call")
    print("5. ▶️ Run this cell!")
    
    print("\n⏱️ TRAINING DETAILS:")
    print("- Duration: ~30-90 minutes")
    print("- Cost: ~$0.50-$3.00 in Replicate credits")
    print("- Output: Custom SVG glyph model trained on your Midjourney style")
    print("- Base Model: recraft-ai/recraft-v3-svg (SVG optimized)")
    print("- Trigger Word: meru")
    
else:
    print("❌ Please set your Google Drive link first in the cell above")
    print("💡 Paste your share link in MIDJOURNEY_ZIP_SHARE_LINK variable")

🚀 READY TO START MIDJOURNEY CODEX TRAINING
🎨 Training Universal gGlyph Codex...
📊 Dataset URL: https://drive.google.com/uc?export=download&id=1Zz...
🎯 Will create model: conorbyrnes04/meru

💡 TO START TRAINING:
1. ✅ Make sure your Google Drive link is set above
2. ✅ Verify the URL is accessible (uncomment test)
3. ✅ Update the destination username above
4. 🚀 Remove the triple quotes around start_training() call
5. ▶️ Run this cell!

⏱️ TRAINING DETAILS:
- Duration: ~30-90 minutes
- Cost: ~$0.50-$3.00 in Replicate credits
- Output: Custom SVG glyph model trained on your Midjourney style
- Base Model: recraft-ai/recraft-v3-svg (SVG optimized)
- Trigger Word: meru
