In [135]:
# Setup Environment - Multi-Provider Support
import os
import json
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
import warnings
import openai
from dotenv import load_dotenv
warnings.filterwarnings('ignore')

# Load environment variables from .env file
load_dotenv()

# Multi-Provider API setup
print("🔬 Multi-Provider LLM Migration Tool Initialized")
print(f"⚙️  Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Initialize OpenRouter client
try:
    OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY')
    
    if not OPENROUTER_API_KEY:
        raise ValueError("OPENROUTER_API_KEY not found in environment variables.")
    
    openrouter_client = openai.OpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=OPENROUTER_API_KEY,
    )
    print("✅ OpenRouter client initialized successfully")
except Exception as e:
    print(f"❌ Error initializing OpenRouter client: {e}")
    openrouter_client = None

# Initialize Google AI client
try:
    GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
    
    if not GOOGLE_API_KEY:
        raise ValueError("GOOGLE_API_KEY not found in environment variables.")
    
    # Import and configure Google AI with new API
    import google.genai as genai
    
    # Create client with API key
    google_client = genai.Client(api_key=GOOGLE_API_KEY)
    print("✅ Google AI client initialized successfully")
except Exception as e:
    print(f"❌ Error initializing Google AI client: {e}")
    google_client = None

# Provider configuration
PROVIDERS = {
    'openrouter': {
        'client': openrouter_client,
        'api_key': OPENROUTER_API_KEY,
        'enabled': openrouter_client is not None
    },
    'google': {
        'client': google_client,
        'api_key': GOOGLE_API_KEY,
        'enabled': google_client is not None
    }
}

# Show available providers
enabled_providers = [name for name, config in PROVIDERS.items() if config['enabled']]
print(f"\n🎯 Available providers: {', '.join(enabled_providers)}")

print("\n🌐 Supported models:")
print("   OpenRouter: 'anthropic/claude-3.5-sonnet', 'meta-llama/llama-3.1-8b-instruct', etc.")
print("   Google AI: 'gemini-1.5-pro', 'gemini-1.5-flash', 'gemini-1.0-pro', etc.")
print("📋 Visit https://openrouter.ai/models for OpenRouter model list")

🔬 Multi-Provider LLM Migration Tool Initialized
⚙️  Timestamp: 2025-09-03 14:41:31
✅ OpenRouter client initialized successfully
✅ OpenRouter client initialized successfully
✅ Google AI client initialized successfully

🎯 Available providers: openrouter, google

🌐 Supported models:
   OpenRouter: 'anthropic/claude-3.5-sonnet', 'meta-llama/llama-3.1-8b-instruct', etc.
   Google AI: 'gemini-1.5-pro', 'gemini-1.5-flash', 'gemini-1.0-pro', etc.
📋 Visit https://openrouter.ai/models for OpenRouter model list
✅ Google AI client initialized successfully

🎯 Available providers: openrouter, google

🌐 Supported models:
   OpenRouter: 'anthropic/claude-3.5-sonnet', 'meta-llama/llama-3.1-8b-instruct', etc.
   Google AI: 'gemini-1.5-pro', 'gemini-1.5-flash', 'gemini-1.0-pro', etc.
📋 Visit https://openrouter.ai/models for OpenRouter model list


In [136]:
# SIMPLIFIED MULTI-PROVIDER ROUTING SYSTEM
import requests
import google.genai as genai

class MultiProviderClient:
    """Simplified multi-provider LLM client with reduced redundancy."""
    
    # Provider detection patterns (data-driven approach)
    PROVIDER_PATTERNS = {
        'google': ['gemini', 'palm', 'bard'],
        'openrouter': ['anthropic', 'openai', 'meta', 'mistral', 'cohere', 
                      'deepseek', 'qwen', 'dolphin', 'nous', 'microsoft']
    }
    
    # Default configurations
    DEFAULT_CONFIG = {
        'max_tokens': {'google': 8192, 'openrouter': 80000},
        'temperature': 0.3,
        'timeout': 300
    }
    
    def __init__(self, providers):
        self.providers = providers
    
    def detect_provider(self, model_name: str) -> str:
        """Detect provider using pattern matching."""
        model_lower = model_name.lower()
        
        # Check for Google AI patterns
        if any(keyword in model_lower for keyword in self.PROVIDER_PATTERNS['google']):
            return 'google'
        
        # Check for OpenRouter patterns (including slash notation)
        if ('/' in model_name or 
            any(pattern in model_lower for pattern in self.PROVIDER_PATTERNS['openrouter'])):
            return 'openrouter'
        
        return 'openrouter'  # Default fallback
    
    def make_api_call(self, model_name: str, prompt: str, **kwargs) -> dict:
        """Unified API call with error handling."""
        provider = self.detect_provider(model_name)
        
        # Check provider availability
        if not self.providers.get(provider, {}).get('enabled'):
            return self._error_response(f'Provider {provider} is not enabled')
        
        print(f"🔗 Using {provider.upper()} provider for {model_name}")
        
        # Route to appropriate provider method
        try:
            if provider == 'google':
                return self._call_google(model_name, prompt, **kwargs)
            else:  # openrouter
                return self._call_openrouter(model_name, prompt, **kwargs)
        except Exception as e:
            return self._error_response(str(e))
    
    def _call_openrouter(self, model_name: str, prompt: str, **kwargs) -> dict:
        """OpenRouter API call."""
        payload = {
            "model": model_name,
            "messages": [{"role": "user", "content": prompt}],
            "max_tokens": kwargs.get('max_tokens', self.DEFAULT_CONFIG['max_tokens']['openrouter']),
            "temperature": kwargs.get('temperature', self.DEFAULT_CONFIG['temperature'])
        }
        
        headers = {
            "Authorization": f"Bearer {self.providers['openrouter']['api_key']}",
            "Content-Type": "application/json",
            "HTTP-Referer": "https://github.com/research-project",
            "X-Title": "LLM PHP Migration Research"
        }
        
        response = requests.post(
            "https://openrouter.ai/api/v1/chat/completions",
            headers=headers,
            data=json.dumps(payload),
            timeout=self.DEFAULT_CONFIG['timeout']
        )
        
        if response.status_code != 200:
            return self._error_response(f'HTTP {response.status_code}: {response.text[:500]}')
        
        result = response.json()
        return self._success_response(
            content=result['choices'][0]['message']['content'],
            provider='openrouter',
            model=model_name,
            usage=result.get('usage', {})
        )
    
    def _call_google(self, model_name: str, prompt: str, **kwargs) -> dict:
        """Google AI API call."""
        client = self.providers['google']['client']
        
        response = client.models.generate_content(
            model=model_name,
            contents=prompt,
            config={
                'temperature': kwargs.get('temperature', self.DEFAULT_CONFIG['temperature']),
                'max_output_tokens': kwargs.get('max_tokens', self.DEFAULT_CONFIG['max_tokens']['google']),
                'top_p': 0.95,
                'top_k': 40
            }
        )
        
        if not response.text:
            return self._error_response('Empty response from Google AI')
        
        # Extract usage info safely
        usage_info = {}
        if hasattr(response, 'usage_metadata'):
            try:
                usage_info = {
                    'prompt_tokens': getattr(response.usage_metadata, 'prompt_token_count', 0),
                    'completion_tokens': getattr(response.usage_metadata, 'candidates_token_count', 0)
                }
            except:
                pass
        
        return self._success_response(
            content=response.text,
            provider='google',
            model=model_name,
            usage=usage_info
        )
    
    def _success_response(self, content: str, provider: str, model: str, usage: dict) -> dict:
        """Standardized success response."""
        return {
            'success': True,
            'content': content,
            'provider': provider,
            'model': model,
            'usage': usage
        }
    
    def _error_response(self, error_message: str) -> dict:
        """Standardized error response."""
        return {'success': False, 'error': error_message}

# Initialize the simplified multi-provider client
multi_client = MultiProviderClient(PROVIDERS)


# Test provider detection
test_models = [
    'gemini-1.5-pro',
    'anthropic/claude-3.5-sonnet', 
    'meta-llama/llama-3.1-8b-instruct',
    'gemini-1.5-flash'
]

print("\n🔍 Provider Detection Test:")
for model in test_models:
    provider = multi_client.detect_provider(model)
    print(f"   {model} → {provider.upper()}")


🔍 Provider Detection Test:
   gemini-1.5-pro → GOOGLE
   anthropic/claude-3.5-sonnet → OPENROUTER
   meta-llama/llama-3.1-8b-instruct → OPENROUTER
   gemini-1.5-flash → GOOGLE


In [137]:
# LOAD TEST FILES
test_files = {}
old_version_path = Path('selected_100_files\extra_large_1000_plus')

In [None]:
# BASIC PROMPTING STRATEGY
BASIC_PROMPT_TEMPLATE = """You are a senior PHP developer with expertise in legacy code modernization.
Your task is to migrate this legacy PHP code  to PHP 8.3 standards using modern syntax  while maintaining functional equivalence.

Please migrate the following PHP code up to PHP 8.3 standards:

{code}

Your response should follow this EXACT format:

// MIGRATION_START
[your migrated PHP code here]
// MIGRATION_END

CRITICAL FORMATTING REQUIREMENT: 
- Place the MIGRATION_START marker BEFORE the opening <?php tag
- Place the MIGRATION_END marker AFTER the closing PHP code
- Do NOT place these markers inside the PHP code itself

Provide only the migrated PHP code with the markers placed correctly outside the PHP code block, no additional commentary."""

print("✅ Basic prompting strategy configured with fixed marker placement")

✅ Basic prompting strategy configured with fixed marker placement


In [None]:
# COMPREHENSIVE PROMPTING STRATEGY
COMPREHENSIVE_PROMPT_TEMPLATE = """You are a senior PHP developer with expertise in legacy php code modernization.
Your task is to migrate old PHP code up to PHP 8.3 standards using specific modern features like strict typing, constructor property promotion, match expressions, union types, and secure function replacements while preserving compatibility and maintaining the functionality of the original code.

Migration Requirements:
1. Update deprecated syntax
2. Replace deprecated functions
3. Implement modern PHP features
4. Improve security and code quality
5. Maintain functional equivalence
6. Enforce strict typing
7. Adopt core PHP 8.3 constructs

Please migrate the following PHP code to PHP 8.3:

{code}



Your response should follow this EXACT format:

// MIGRATION_START
[your migrated PHP code here]
// MIGRATION_END

CRITICAL FORMATTING REQUIREMENT: 
- Place the MIGRATION_START marker BEFORE the opening <?php tag
- Place the MIGRATION_END marker AFTER the closing PHP code
- Do NOT place these markers inside the PHP code itself

Include the markers as comments OUTSIDE the PHP code block. Keep the original comments as they are.
Do not add any other text, explanations, or commentary outside the markers. Make sure you give the COMPLETE migrated code."""

# CHUNKING PROMPTS FOR LARGE FILES
CHUNK_BASIC_PROMPT_TEMPLATE = """You are a senior PHP developer with expertise in legacy code modernization. 
Your task is to migrate this PARTIAL SEGMENT of a larger PHP file up to PHP 8.3 standards using specific modern features.


CONTEXT:
- Original file: {filename}
- Processing lines: {start_line} to {end_line} (of {total_lines} total lines)
- This is chunk {chunk_number} of {total_chunks}

CRITICAL INSTRUCTIONS FOR PARTIAL CODE SEGMENTS:
1. This is ONLY a SEGMENT of a larger file - DO NOT try to complete it
2. DO NOT add opening <?php tags unless the segment starts with one
3. DO NOT add closing ?> tags unless the segment already has one  
4. DO NOT add any closing braces }} that are not in the original segment
5. DO NOT add any opening braces {{ that are not in the original segment
6. DO NOT try to complete class definitions, function definitions, or any code structures
7. If the segment starts mid-function, keep it mid-function
8. If the segment ends mid-function, keep it mid-function
9. Preserve the EXACT START and END boundaries of the provided code segment

WARNING: Adding extra braces or completing code structures will break the reconstruction process!

Please migrate ONLY the following PHP code segment up to PHP 8.3 standards:

{code}

Your response should follow this EXACT format:

// MIGRATION_START
[your migrated code segment here - exactly as provided, no additions]
// MIGRATION_END

CRITICAL FORMATTING REQUIREMENT: 
- Place the MIGRATION_START marker BEFORE the code segment
- Place the MIGRATION_END marker AFTER the code segment  
- Do NOT place these markers inside the PHP code itself

Migrate only the provided code segment. Do not add any missing parts or try to complete incomplete structures."""

CHUNK_COMPREHENSIVE_PROMPT_TEMPLATE = """You are a senior PHP developer with expertise in legacy code modernization. 
Your task is to migrate this PARTIAL SEGMENT of a larger PHP file up to PHP 8.3 standards using specific modern features like strict typing, constructor property promotion, match expressions, union types, and secure function replacements .


CONTEXT:
- Original file: {filename}
- Processing lines: {start_line} to {end_line} (of {total_lines} total lines)  
- This is chunk {chunk_number} of {total_chunks}

CRITICAL INSTRUCTIONS FOR PARTIAL CODE SEGMENTS:
1. This is ONLY a SEGMENT of a larger file - DO NOT try to complete it
2. DO NOT add opening <?php tags unless the segment starts with one
3. DO NOT add closing ?> tags unless the segment already has one
4. DO NOT add any closing braces }} that are not in the original segment
5. DO NOT add any opening braces {{ that are not in the original segment  
6. DO NOT try to complete class definitions, function definitions, or any code structures
7. If the segment starts mid-function, keep it mid-function
8. If the segment ends mid-function, keep it mid-function
9. Preserve the EXACT START and END boundaries of the provided code segment

WARNING: Adding extra braces or completing code structures will break the reconstruction process!

Migration Requirements for this segment:
1. Update deprecated syntax
2. Replace deprecated functions
3. Implement modern PHP features
4. Adopt core PHP 8.3 constructs

Please migrate ONLY the following PHP code segment to PHP 8.3:

{code}

Your response should follow this EXACT format:

// MIGRATION_START
[your migrated code segment here - exactly as provided, no additions]
// MIGRATION_END

CRITICAL FORMATTING REQUIREMENT: 
- Place the MIGRATION_START marker BEFORE the code segment
- Place the MIGRATION_END marker AFTER the code segment
- Do NOT place these markers inside the PHP code itself

Include the markers as comments OUTSIDE the code segment. Keep the original comments as they are.
Migrate only the provided code segment. Do not add missing functions, classes, or try to complete the file."""

print("✅ Comprehensive and chunking prompting strategies configured with fixed marker placement")
print("🔧 Updated all prompts to prevent placing MIGRATION markers inside PHP code")
print("⚠️  Added explicit warnings about NOT completing code structures in chunks")

✅ Comprehensive and chunking prompting strategies configured with fixed marker placement
🔧 Updated all prompts to prevent placing MIGRATION markers inside PHP code


In [140]:
import re
import subprocess
import json
from pathlib import Path

def chunk_code(code: str, chunk_size: int = None) -> list:
    """Smart PHP-aware chunking using PHP tokenizer for accurate function detection."""
    if chunk_size is None:
        chunk_size = DEFAULT_CHUNK_SIZE
    
    lines = code.split('\n')
    total_lines = len(lines)
    
    if total_lines <= chunk_size:
        return [{
            'start_line': 1,
            'end_line': total_lines,
            'actual_size': total_lines,
            'total_lines': total_lines,
            'code': code
        }]
    
    # Get function boundaries using smart parsing
    function_boundaries = find_function_boundaries(code, lines)
    
    # Create chunks based on function boundaries
    return create_smart_chunks(lines, function_boundaries, chunk_size, total_lines)

def find_function_boundaries(code: str, lines: list) -> list:
    """Find function boundaries using PHP tokenizer if available, else regex."""
    # Try PHP tokenizer first
    php_functions = try_php_tokenizer(code, lines)
    if php_functions:
        return php_functions
    
    # Fallback to regex-based parsing
    return find_functions_with_regex(lines)

def try_php_tokenizer(code: str, lines: list) -> list:
    """Try to use PHP's built-in tokenizer for accurate parsing."""
    try:
        php_script = f'''<?php
$code = <<<'EOD'
{code}
EOD;

$tokens = token_get_all($code);
$functions = [];
$current_function = null;
$brace_level = 0;
$in_function = false;

foreach ($tokens as $token) {{
    if (is_array($token)) {{
        if ($token[0] === T_FUNCTION) {{
            $in_function = true;
            $current_function = [
                'start_line' => $token[2] - 1,
                'end_line' => null,
                'name' => null
            ];
        }}
        
        if ($in_function && $token[0] === T_STRING && $current_function['name'] === null) {{
            $current_function['name'] = $token[1];
        }}
    }} else {{
        if ($token === '{{' && $in_function) {{
            $brace_level++;
        }} elseif ($token === '}}' && $in_function) {{
            $brace_level--;
            if ($brace_level === 0) {{
                $current_function['end_line'] = find_closing_brace($current_function['start_line']);
                $functions[] = $current_function;
                $current_function = null;
                $in_function = false;
            }}
        }}
    }}
}}

function find_closing_brace($start_line) {{
    global $code;
    $lines = explode("\\n", $code);
    $brace_count = 0;
    $function_started = false;
    
    for ($i = $start_line; $i < count($lines); $i++) {{
        $line = trim($lines[$i]);
        if (empty($line) || strpos($line, '//') === 0 || strpos($line, '#') === 0) continue;
        
        for ($j = 0; $j < strlen($line); $j++) {{
            $char = $line[$j];
            if ($char === '{{') {{
                $brace_count++;
                $function_started = true;
            }} elseif ($char === '}}' && $function_started) {{
                $brace_count--;
                if ($brace_count === 0) return $i;
            }}
        }}
    }}
    return $start_line;
}}

echo json_encode($functions);
?>'''
        
        temp_php = Path('temp_parser.php')
        with open(temp_php, 'w', encoding='utf-8') as f:
            f.write(php_script)
        
        result = subprocess.run(['php', str(temp_php)], 
                              capture_output=True, text=True, timeout=10)
        temp_php.unlink()
        
        if result.returncode == 0 and result.stdout.strip():
            return json.loads(result.stdout.strip())
            
    except (subprocess.TimeoutExpired, subprocess.CalledProcessError, 
            FileNotFoundError, json.JSONDecodeError):
        pass
    
    return []

def find_functions_with_regex(lines: list) -> list:
    """Regex-based function detection with proper closing brace detection."""
    functions = []
    function_patterns = [
        r'^\s*(?:(?:public|private|protected)\s+)?(?:static\s+)?function\s+(\w+)\s*\(',
        r'^\s*(?:abstract\s+)?(?:final\s+)?function\s+(\w+)\s*\(',
        r'^\s*function\s+(\w+)\s*\('
    ]
    
    i = 0
    while i < len(lines):
        line = lines[i].strip()
        
        if not line or line.startswith(('//','#')):
            i += 1
            continue
        
        # Check for function start
        function_match = None
        for pattern in function_patterns:
            match = re.match(pattern, line, re.IGNORECASE)
            if match:
                function_match = match
                break
        
        if function_match:
            function_name = function_match.group(1)
            closing_brace_line = find_function_closing_brace(i, lines)
            
            if closing_brace_line is not None:
                functions.append({
                    'start_line': i,
                    'end_line': closing_brace_line,
                    'name': function_name
                })
                i = closing_brace_line + 1
            else:
                i += 1
        else:
            i += 1
    
    return functions

def find_function_closing_brace(start_line: int, lines: list) -> int:
    """Find the closing brace line for a function."""
    brace_count = 0
    function_started = False
    
    for i in range(start_line, len(lines)):
        line = lines[i].strip()
        
        if not line or line.startswith(('//','#')):
            continue
        
        # Simple brace counting (could be enhanced to handle strings/comments)
        for char in line:
            if char == '{':
                brace_count += 1
                function_started = True
            elif char == '}' and function_started:
                brace_count -= 1
                if brace_count == 0:
                    return i
    
    return None

def create_smart_chunks(lines: list, function_boundaries: list, target_chunk_size: int, total_lines: int) -> list:
    """Create chunks that respect function boundaries."""
    chunks = []
    current_pos = 0
    functions = sorted(function_boundaries, key=lambda f: f['start_line'])
    
    while current_pos < total_lines:
        # Calculate chunk end position
        initial_end_pos = min(current_pos + target_chunk_size - 1, total_lines - 1)
        
        # Find functions that would be split by this chunk boundary
        relevant_functions = [f for f in functions 
                            if (current_pos <= f['start_line'] <= initial_end_pos) or
                               (f['start_line'] < current_pos and f['end_line'] and f['end_line'] >= current_pos)]
        
        # Extend chunk to complete functions if reasonable
        final_end_pos = initial_end_pos
        if relevant_functions:
            for func in relevant_functions:
                if func['end_line'] and func['end_line'] <= initial_end_pos + 300:  # Max extension
                    final_end_pos = max(final_end_pos, func['end_line'])
        
        # Create chunk
        chunk = {
            'start_line': current_pos + 1,  # Convert to 1-based
            'end_line': final_end_pos + 1,  # Convert to 1-based
            'actual_size': final_end_pos - current_pos + 1,
            'total_lines': total_lines,
            'code': '\n'.join(lines[current_pos:final_end_pos + 1])
        }
        chunks.append(chunk)
        
        current_pos = final_end_pos + 1
    
    return chunks

In [141]:
DEFAULT_CHUNK_SIZE = 500  # Default chunk size in lines

In [None]:
# PROMPT HELPER FUNCTION
PROMPT_TEMPLATES = {
    'basic': BASIC_PROMPT_TEMPLATE,
    'comprehensive': COMPREHENSIVE_PROMPT_TEMPLATE,
    'chunk_basic': CHUNK_BASIC_PROMPT_TEMPLATE,
    'chunk_comprehensive': CHUNK_COMPREHENSIVE_PROMPT_TEMPLATE,
}


def create_prompt(code: str, strategy: str = "basic", **kwargs) -> str:
    """Create migration prompts using different strategies."""
    if strategy not in PROMPT_TEMPLATES:
        raise ValueError(f"Unknown prompting strategy: {strategy}. Available: {list(PROMPT_TEMPLATES.keys())}")
    
    template = PROMPT_TEMPLATES[strategy]
    
    # For chunking strategies, we need additional parameters
    if strategy.startswith('chunk_'):
        required_params = ['filename', 'start_line', 'end_line', 'total_lines', 'chunk_number', 'total_chunks']
        missing_params = [param for param in required_params if param not in kwargs]
        if missing_params:
            raise ValueError(f"Chunking strategy requires parameters: {missing_params}")
    
    return template.format(code=code, **kwargs)

print("🎯 All prompting strategies configured and ready")
print(f"📋 Available strategies: {list(PROMPT_TEMPLATES.keys())}")
print("🔧 Added chunking utilities for large files")
print(f"📦 Default chunk size: {DEFAULT_CHUNK_SIZE} lines")

🎯 All prompting strategies configured and ready
📋 Available strategies: ['basic', 'comprehensive', 'chunk_basic', 'chunk_comprehensive']
🔧 Added chunking utilities for large files
📦 Default chunk size: 500 lines


In [143]:
# SIMPLIFIED MULTI-PROVIDER MIGRATION SYSTEM

class MigrationManager:
    """Simplified migration manager with reduced redundancy."""
    
    @staticmethod
    def normalize_model_name(model_name: str) -> str:
        """Convert model name to filesystem-safe format."""
        return model_name.replace('/', '_').replace('-', '_').replace(':', '_').replace('.', '_').lower()
    
    @staticmethod
    def save_response(response_data: dict, file_path: Path, metadata: dict = None):
        """Save API response with consistent metadata format."""
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write("=== RAW MODEL RESPONSE ===\n")
            
            # Write metadata
            if metadata:
                for key, value in metadata.items():
                    f.write(f"{key.capitalize()}: {value}\n")
            
            f.write(f"Length: {len(response_data['content'])} characters\n")
            f.write(f"Usage: {response_data.get('usage', {})}\n")
            f.write(f"Timestamp: {datetime.now()}\n")
            f.write("=" * 50 + "\n\n")
            f.write(response_data['content'])

def migrate_file_chunked(filename: str, original_code: str, model_name: str, strategy: str, chunk_size: int):
    """Migrate large file using organized chunking."""
    chunks = chunk_code(original_code, chunk_size)
    total_chunks = len(chunks)
    
    print(f"📦 Split into {total_chunks} chunks of ~{chunk_size} lines each")
    
    # Create organized folder structure
    model_short = MigrationManager.normalize_model_name(model_name)
    file_base = filename.replace('.php', '')
    
    file_dir = Path('chunked_model_output') / model_short / file_base
    file_dir.mkdir(parents=True, exist_ok=True)
    print(f"📁 Saving chunks to: {file_dir}")
    
    # Process chunks
    chunk_strategy = f"chunk_{strategy}" if not strategy.startswith('chunk_') else strategy
    all_responses = []
    
    for i, chunk_info in enumerate(chunks, 1):
        print(f"\n[Chunk {i}/{total_chunks}] Processing lines {chunk_info['start_line']}-{chunk_info['end_line']}...")
        
        # Create prompt and make API call
        prompt = create_prompt(
            chunk_info['code'], chunk_strategy,
            filename=filename, start_line=chunk_info['start_line'],
            end_line=chunk_info['end_line'], total_lines=chunk_info['total_lines'],
            chunk_number=i, total_chunks=total_chunks
        )
        
        print(f"📏 Chunk prompt length: {len(prompt):,} characters")
        response = process_api_call(model_name, prompt, file_dir / f"{i}.txt", {
            'file': filename, 'model': model_name, 'strategy': chunk_strategy, 'chunk': i
        })
        
        all_responses.append(response)
        status = "✅" if response else "❌"
        print(f"{status} Chunk {i} {'processed successfully' if response else 'failed'}")
    
    # Summary
    successful_chunks = sum(1 for r in all_responses if r is not None)
    print(f"\n🎉 Chunked migration completed!")
    print(f"✅ Successful chunks: {successful_chunks}/{total_chunks}")
    print(f"📁 All chunks saved in: {file_dir}")
    
    return all_responses

def migrate_file(filename: str, model_name: str, strategy: str = "basic", 
                chunk_size: int = None, auto_chunk: bool = True):
    """Enhanced migration function with multi-provider support."""
    
    chunk_size = chunk_size or DEFAULT_CHUNK_SIZE
    
    if filename not in test_files:
        print(f"❌ File '{filename}' not found")
        return None
    
    original_code = test_files[filename]
    line_count = len(original_code.split('\n'))
    
    print(f"🚀 Migrating {filename} using {model_name} with {strategy} strategy...")
    print(f"📏 Input code length: {len(original_code):,} characters ({line_count:,} lines)")
    
    # Decide processing method
    if auto_chunk and line_count > chunk_size:
        print(f"📦 Large file detected ({line_count} lines) - using organized chunking")
        return migrate_file_chunked(filename, original_code, model_name, strategy, chunk_size)
    else:
        print(f"📄 Processing as single file ({line_count} lines, chunk limit: {chunk_size})")
        return migrate_file_single(filename, original_code, model_name, strategy)

def migrate_file_single(filename: str, original_code: str, model_name: str, strategy: str):
    """Migrate single file using multi-provider client."""
    prompt = create_prompt(original_code, strategy)
    print(f"📏 Prompt length: {len(prompt):,} characters")
    
    # Create output path
    model_short = MigrationManager.normalize_model_name(model_name)
    base_name = filename.replace('.php', '')
    output_file = Path('model_output') / model_short / f"{base_name}.txt"
    output_file.parent.mkdir(parents=True, exist_ok=True)
    
    return process_api_call(model_name, prompt, output_file, {
        'file': filename, 'model': model_name, 'strategy': strategy
    })

def process_api_call(model_name: str, prompt: str, output_path: Path, metadata: dict):
    """Unified API call processing with error handling."""
    print(f"🔗 Making API call via multi-provider client...")
    
    # Make API call
    result = multi_client.make_api_call(model_name, prompt)
    print(f"📊 Provider: {result.get('provider', 'unknown').upper()}")
    
    if not result['success']:
        print(f"❌ API Error: {result['error']}")
        return None
    
    # Validate response
    raw_response = result['content']
    print(f"📏 Response length: {len(raw_response)} characters")
    
    if not raw_response or len(raw_response.strip()) < 10:
        print(f"❌ Model response is empty or too short")
        return None
    
    # Save response
    metadata['provider'] = result.get('provider', 'unknown').upper()
    MigrationManager.save_response(result, output_path, metadata)
    print(f"✅ Response saved to: {output_path}")
    
    return raw_response


In [144]:
if old_version_path.exists():
    # Recursively find all PHP files in all subfolders
    for php_file in old_version_path.rglob('*.php'):
        try:
            with open(php_file, 'r', encoding='utf-8', errors='ignore') as f:
                content = f.read()
                if content.strip():
                    test_files[php_file.name] = content
        except Exception as e:
            print(f"⚠️  Could not load {php_file.name}: {e}")
    
    print(f"📁 Loaded {len(test_files)} PHP files from selected_100_files:")
    for filename in sorted(test_files.keys()):
        size = len(test_files[filename])
        print(f"   📄 {filename} ({size:,} chars)")
else:
    print("❌ selected_100_files directory not found")
    print("💡 Make sure the selected 100 files are in 'selected_100_files/' directory")


📁 Loaded 12 PHP files from selected_100_files:
   📄 001_getid3.lib.php (43,504 chars)
   📄 002_module.audio-video.asf.php (129,115 chars)
   📄 003_wp-db.php (60,679 chars)
   📄 004_class-IXR.php (32,854 chars)
   📄 005_class-snoopy.php (37,776 chars)
   📄 006_widgets.php (47,668 chars)
   📄 009_getid3.php (62,683 chars)
   📄 010_class-wp-theme.php (39,449 chars)
   📄 012_module.audio-video.riff.php (111,145 chars)
   📄 013_file.php (45,191 chars)
   📄 014_module.tag.id3v2.php (134,159 chars)
   📄 057_class-wp-customize-manager.php (32,639 chars)


In [145]:

def batch_migrate(filenames: list, model: str = "gemini-1.5-pro", strategy: str = "basic", 
                 chunk_size: int = None, auto_chunk: bool = True):
    """Migrate multiple files with multi-provider chunking support."""
    chunk_size = chunk_size or DEFAULT_CHUNK_SIZE
    provider = multi_client.detect_provider(model)
    
    print(f"🔄 Batch migrating {len(filenames)} files using {provider.upper()}")
    if auto_chunk:
        print(f"📦 Auto-chunking enabled for files > {chunk_size} lines")
    
    results = []
    stats = {'files': 0, 'chunks': 0, 'success_files': 0, 'success_chunks': 0}
    
    for i, filename in enumerate(filenames, 1):
        print(f"\n[{i}/{len(filenames)}] Processing {filename}...")
        result = migrate_file(filename, model, strategy, chunk_size=chunk_size, auto_chunk=auto_chunk)
        results.append(result)
        
        # Update statistics
        stats['files'] += 1
        if result is not None:
            if isinstance(result, list):  # Chunked file
                stats['chunks'] += len(result)
                stats['success_chunks'] += sum(1 for r in result if r is not None)
                if any(r is not None for r in result):
                    stats['success_files'] += 1
            else:  # Single file
                stats['chunks'] += 1
                stats['success_chunks'] += 1
                stats['success_files'] += 1
    
    # Summary
    print(f"\n🎉 Batch migration completed!")
    print(f"✅ Successful files: {stats['success_files']}/{stats['files']}")
    if stats['chunks'] > len(filenames):
        print(f"📦 Total chunks processed: {stats['success_chunks']}/{stats['chunks']}")
    
    return results


def analyze_file_sizes(chunk_threshold: int = None):
    """Analyze file sizes to see chunking requirements."""
    chunk_threshold = chunk_threshold or DEFAULT_CHUNK_SIZE
    
    if not test_files:
        print("❌ No test files loaded")
        return
    
    # Categorize files
    small_files, large_files = [], []
    for filename, content in test_files.items():
        line_count = len(content.split('\n'))
        char_count = len(content)
        file_info = (filename, line_count, char_count)
        
        if line_count <= chunk_threshold:
            small_files.append(file_info)
        else:
            large_files.append(file_info)
    
    print("📊 File Size Analysis")
    print("=" * 40)
    
    # Small files summary
    print(f"📄 Small files (≤{chunk_threshold} lines): {len(small_files)}")
    for filename, lines, chars in sorted(small_files, key=lambda x: x[1], reverse=True)[:10]:
        print(f"   {filename}: {lines:,} lines, {chars:,} chars")
    if len(small_files) > 10:
        print(f"   ... and {len(small_files) - 10} more")
    
    # Large files summary
    if large_files:
        print(f"\n📦 Large files (>{chunk_threshold} lines): {len(large_files)}")
        total_lines = sum(lines for _, lines, _ in large_files)
        total_chunks = sum((lines + chunk_threshold - 1) // chunk_threshold for _, lines, _ in large_files)
        
        for filename, lines, chars in sorted(large_files, key=lambda x: x[1], reverse=True):
            chunks = (lines + chunk_threshold - 1) // chunk_threshold
            print(f"   {filename}: {lines:,} lines, {chars:,} chars → {chunks} chunks")
        
        print(f"\n📊 Large files summary: {total_lines:,} total lines → {total_chunks} chunks")



In [146]:
# ANALYZE YOUR FILES FOR CHUNKING
print("🔍 Analyzing loaded files to see chunking requirements...")
analyze_file_sizes()

# Test chunking on a sample file
print("\n🧪 Testing chunking logic on sample files...")
for filename in list(test_files.keys())[:3]:
    code = test_files[filename]
    line_count = len(code.split('\n'))
    chunks = chunk_code(code, DEFAULT_CHUNK_SIZE)
    
    print(f"\n📄 {filename}:")
    print(f"   Lines: {line_count:,}")
    print(f"   Chunks: {len(chunks)}")
    
    if len(chunks) > 1:
        print(f"   Chunk breakdown:")
        for i, chunk_info in enumerate(chunks, 1):
            print(f"     Chunk {i}: lines {chunk_info['start_line']}-{chunk_info['end_line']} ({chunk_info['end_line'] - chunk_info['start_line'] + 1} lines)")

print(f"\n✅ File analysis complete!")

🔍 Analyzing loaded files to see chunking requirements...
📊 File Size Analysis
📄 Small files (≤500 lines): 0

📦 Large files (>500 lines): 12
   014_module.tag.id3v2.php: 3,415 lines, 134,159 chars → 7 chunks
   012_module.audio-video.riff.php: 2,436 lines, 111,145 chars → 5 chunks
   003_wp-db.php: 2,187 lines, 60,679 chars → 5 chunks
   002_module.audio-video.asf.php: 2,020 lines, 129,115 chars → 5 chunks
   009_getid3.php: 1,776 lines, 62,683 chars → 4 chunks
   006_widgets.php: 1,515 lines, 47,668 chars → 4 chunks
   001_getid3.lib.php: 1,347 lines, 43,504 chars → 3 chunks
   057_class-wp-customize-manager.php: 1,273 lines, 32,639 chars → 3 chunks
   005_class-snoopy.php: 1,257 lines, 37,776 chars → 3 chunks
   010_class-wp-theme.php: 1,236 lines, 39,449 chars → 3 chunks
   013_file.php: 1,151 lines, 45,191 chars → 3 chunks
   004_class-IXR.php: 1,101 lines, 32,854 chars → 3 chunks

📊 Large files summary: 20,714 total lines → 48 chunks

🧪 Testing chunking logic on sample files...



📄 001_getid3.lib.php:
   Lines: 1,347
   Chunks: 3
   Chunk breakdown:
     Chunk 1: lines 1-508 (508 lines)
     Chunk 2: lines 509-1085 (577 lines)
     Chunk 3: lines 1086-1347 (262 lines)

📄 002_module.audio-video.asf.php:
   Lines: 2,020
   Chunks: 4
   Chunk breakdown:
     Chunk 1: lines 1-500 (500 lines)
     Chunk 2: lines 501-1000 (500 lines)
     Chunk 3: lines 1001-1577 (577 lines)
     Chunk 4: lines 1578-2020 (443 lines)

📄 003_wp-db.php:
   Lines: 2,187
   Chunks: 5
   Chunk breakdown:
     Chunk 1: lines 1-500 (500 lines)
     Chunk 2: lines 501-1002 (502 lines)
     Chunk 3: lines 1003-1523 (521 lines)
     Chunk 4: lines 1524-2036 (513 lines)
     Chunk 5: lines 2037-2187 (151 lines)

✅ File analysis complete!

📄 003_wp-db.php:
   Lines: 2,187
   Chunks: 5
   Chunk breakdown:
     Chunk 1: lines 1-500 (500 lines)
     Chunk 2: lines 501-1002 (502 lines)
     Chunk 3: lines 1003-1523 (521 lines)
     Chunk 4: lines 1524-2036 (513 lines)
     Chunk 5: lines 2037-2187 (

In [147]:
# UNCOMMENT THESE LINES FOR BATCH MIGRATION WITH DIFFERENT PROVIDERS:

# Google AI batch migration:
# batch_migrate(list(test_files.keys())[:3], model='gemini-1.5-pro', strategy='basic')

# OpenRouter batch migration:
# batch_migrate(list(test_files.keys())[:3], model='mistralai/mistral-small-3.2-24b-instruct:free', strategy='basic')

# Mixed provider batch (you can mix and match in sequence):
migrate_file('003_wp-db.php', 'gemini-1.5-flash', 'basic')
# migrate_file('file2.php', 'anthropic/claude-3.5-sonnet', 'comprehensive')

🚀 Migrating 003_wp-db.php using gemini-1.5-flash with basic strategy...
📏 Input code length: 60,679 characters (2,187 lines)
📦 Large file detected (2187 lines) - using organized chunking
📦 Split into 5 chunks of ~500 lines each
📁 Saving chunks to: chunked_model_output\gemini_1_5_flash\003_wp-db

[Chunk 1/5] Processing lines 1-500...
📏 Chunk prompt length: 9,424 characters
🔗 Making API call via multi-provider client...
🔗 Using GOOGLE provider for gemini-1.5-flash
📊 Provider: GOOGLE
📏 Response length: 7856 characters
✅ Response saved to: chunked_model_output\gemini_1_5_flash\003_wp-db\1.txt
✅ Chunk 1 processed successfully

[Chunk 2/5] Processing lines 501-1002...
📏 Chunk prompt length: 15,648 characters
🔗 Making API call via multi-provider client...
🔗 Using GOOGLE provider for gemini-1.5-flash
📊 Provider: GOOGLE
📏 Response length: 7856 characters
✅ Response saved to: chunked_model_output\gemini_1_5_flash\003_wp-db\1.txt
✅ Chunk 1 processed successfully

[Chunk 2/5] Processing lines 501-

["// MIGRATION_START\n<?php\n/**\n * WordPress DB Class\n *\n * Original code from {@link http://php.justinvincent.com Justin Vincent (justin@visunet.ie)}\n *\n * @package WordPress\n * @subpackage Database\n * @since 0.71\n */\n\n/**\n * @since 0.71\n */\ndefine( 'EZSQL_VERSION', 'WP1.25' );\n\n/**\n * @since 0.71\n */\ndefine( 'OBJECT', 'OBJECT' );\ndefine( 'object', 'OBJECT' ); // Back compat.\n\n/**\n * @since 2.5.0\n */\ndefine( 'OBJECT_K', 'OBJECT_K' );\n\n/**\n * @since 0.71\n */\ndefine( 'ARRAY_A', 'ARRAY_A' );\n\n/**\n * @since 0.71\n */\ndefine( 'ARRAY_N', 'ARRAY_N' );\n\n/**\n * WordPress Database Access Abstraction Object\n *\n * It is possible to replace this class with your own\n * by setting the $wpdb global variable in wp-content/db.php\n * file to your class. The wpdb class will still be included,\n * so you can extend it or simply use your own.\n *\n * @link http://codex.wordpress.org/Function_Reference/wpdb_Class\n *\n * @package WordPress\n * @subpackage Database\n 

In [148]:
# SIMPLIFIED OUTPUT PARSER - CLEAN AND MINIMAL
import re
from pathlib import Path

class OutputParser:
    """Simplified parser for model responses with removed redundancy."""
    
    def __init__(self):
        self.model_output_path = Path('model_output')
        self.parsed_path = Path('new-version')
        self.parsed_path.mkdir(exist_ok=True)
    
    def extract_migrated_code(self, response_content: str) -> str:
        """Extract code between MIGRATION_START and MIGRATION_END markers."""
        start_match = re.search(r'//\s*MIGRATION_START\s*\n', response_content, re.IGNORECASE)
        end_match = re.search(r'\n//\s*MIGRATION_END', response_content, re.IGNORECASE)
        
        if start_match and end_match:
            return response_content[start_match.end():end_match.start()].strip()
        return ""
    
    def extract_metadata(self, response_content: str) -> dict:
        """Extract metadata from response file header."""
        metadata = {}
        for line in response_content.split('\n')[:15]:
            if ':' in line:
                key, value = line.split(':', 1)
                key = key.strip().lower()
                if key in ['file', 'model', 'strategy']:
                    metadata[f'original_{key}' if key == 'file' else key] = value.strip()
        return metadata
    
    def parse_single_file(self, response_file: Path) -> dict:
        """Parse a single response file."""
        try:
            print(f"Processing {response_file.name}")
            
            with open(response_file, 'r', encoding='utf-8') as f:
                content = f.read()
            
            metadata = self.extract_metadata(content)
            migrated_code = self.extract_migrated_code(content)
            
            if not metadata.get('original_file'):
                print(f"   ERROR: No original file found in metadata")
                return {'success': False}
            
            if not migrated_code:
                print(f"   ERROR: No migrated code found between markers")
                return {'success': False}
            
            print(f"   SUCCESS: Found {len(migrated_code)} chars of migrated code")
            return {
                'success': True,
                'metadata': metadata,
                'migrated_code': migrated_code
            }
        
        except Exception as e:
            print(f"   ERROR: {e}")
            return {'success': False, 'error': str(e)}
    
    def _normalize_model_name(self, model_name: str) -> str:
        """Convert model name to filesystem-safe format."""
        return re.sub(r'[/:.-]', '_', model_name.lower())
    
    def _determine_output_file(self, metadata: dict, response_filename: str, model_folder: Path) -> Path:
        """Determine the output file path."""
        original_filename = metadata.get('original_file')
        if original_filename:
            return model_folder / original_filename
        
        # Fallback: derive from response filename
        php_filename = response_filename.replace('.txt', '.php') if response_filename.endswith('.txt') else f"{response_filename}.php"
        return model_folder / php_filename
    
    def save_parsed_file(self, result: dict, response_filename: str, model_folder_name: str = None) -> bool:
        """Save parsed result to organized structure."""
        try:
            metadata = result['metadata']
            migrated_code = result['migrated_code']
            
            # Determine model folder name
            model_clean = model_folder_name or self._normalize_model_name(metadata.get('model', 'unknown_model'))
            
            # Create model folder and output file
            model_folder = self.parsed_path / model_clean
            model_folder.mkdir(exist_ok=True)
            output_file = self._determine_output_file(metadata, response_filename, model_folder)
            
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(migrated_code)
            
            print(f"   ✅ SAVED: {output_file}")
            return True
            
        except Exception as e:
            print(f"   ❌ SAVE ERROR: {e}")
            return False
    
    def _process_response_files(self, response_files: list, model_folder_name: str = None) -> tuple:
        """Process a list of response files and return success/failed counts."""
        success_count = 0
        failed_count = 0
        
        for response_file in response_files:
            result = self.parse_single_file(response_file)
            
            if result['success']:
                # Update metadata with model folder name if provided
                if model_folder_name and 'metadata' in result:
                    result['metadata']['model_folder'] = model_folder_name
                
                if self.save_parsed_file(result, response_file.name, model_folder_name):
                    success_count += 1
                else:
                    failed_count += 1
            else:
                failed_count += 1
        
        return success_count, failed_count
    
    def process_all_responses(self):
        """Process all response files in model_output directory."""
        print("🔄 Processing all model responses...")
        
        if not self.model_output_path.exists():
            print(f"❌ Directory {self.model_output_path} not found")
            return
        
        # Look for model subfolders
        model_folders = [d for d in self.model_output_path.iterdir() if d.is_dir()]
        
        if not model_folders:
            # Fallback: process files directly (old structure)
            response_files = list(self.model_output_path.glob('*.txt'))
            if response_files:
                print(f"📁 Found {len(response_files)} response files in old structure")
                success, failed = self._process_response_files(response_files)
                print(f"\n🎉 Processing completed!")
                print(f"✅ Successfully processed: {success} files")
                print(f"❌ Failed to process: {failed} files")
            else:
                print("❌ No model folders or .txt files found in model_output/")
            return
        
        print(f"📁 Found {len(model_folders)} model folders:")
        for folder in model_folders:
            print(f"   📂 {folder.name}/")
        
        total_success = 0
        total_failed = 0
        
        # Process each model folder
        for model_folder in model_folders:
            print(f"\n🔄 Processing model: {model_folder.name}")
            
            response_files = list(model_folder.glob('*.txt'))
            print(f"   📄 Found {len(response_files)} response files")
            
            if not response_files:
                print("   ⚠️  No .txt files found in this model folder")
                continue
            
            success_count, failed_count = self._process_response_files(response_files, model_folder.name)
            
            print(f"   ✅ Successfully processed: {success_count} files")
            print(f"   ❌ Failed to process: {failed_count} files")
            
            total_success += success_count
            total_failed += failed_count
        
        print(f"\n🎉 Overall processing completed!")
        print(f"✅ Total successfully processed: {total_success} files")
        print(f"❌ Total failed to process: {total_failed} files")
        
        # Show results summary
        if total_success > 0:
            print(f"\n📁 Results saved to '{self.parsed_path}':")
            for model_folder in sorted(self.parsed_path.iterdir()):
                if model_folder.is_dir():
                    php_files = list(model_folder.glob('*.php'))
                    print(f"   📂 {model_folder.name}/ ({len(php_files)} files)")

# Initialize simplified parser
parser = OutputParser()


In [149]:
# FILE RECONSTRUCTOR - COMBINES PARSED CHUNKS INTO COMPLETE FILES
class FileReconstructor:
    """Reconstructs complete files from parsed chunk files."""
    
    def __init__(self, parser):
        self.parser = parser  # Use the simple parser for individual chunks
        self.chunked_output_path = Path('chunked_model_output')
        self.final_output_path = Path('new-version')
        self.final_output_path.mkdir(exist_ok=True)
    
    def find_chunked_files(self):
        """Find all chunked file directories."""
        if not self.chunked_output_path.exists():
            print("No chunked_model_output directory found")
            return []
        
        chunked_files = []
        
        for model_dir in self.chunked_output_path.iterdir():
            if model_dir.is_dir():
                for file_dir in model_dir.iterdir():
                    if file_dir.is_dir():
                        # Check if it has numbered chunk files
                        chunk_files = list(file_dir.glob('*.txt'))
                        if chunk_files:
                            chunked_files.append({
                                'model': model_dir.name,
                                'filename': file_dir.name,
                                'directory': file_dir,
                                'chunk_count': len(chunk_files)
                            })
        
        return chunked_files
    
    def get_chunk_files(self, directory: Path):
        """Get all chunk files from a directory, sorted by number."""
        chunk_files = []
        
        for file in directory.glob('*.txt'):
            try:
                # Extract number from filename (1.txt -> 1)
                chunk_num = int(file.stem)
                chunk_files.append((chunk_num, file))
            except ValueError:
                print(f"WARNING: Skipping non-numeric chunk file: {file.name}")
        
        # Sort by chunk number
        chunk_files.sort(key=lambda x: x[0])
        return chunk_files
    
    def reconstruct_file(self, file_info: dict):
        """Reconstruct a complete file from its chunks."""
        print(f"\nReconstructing {file_info['filename']}.php from {file_info['chunk_count']} chunks")
        print(f"Model: {file_info['model']}")
        print(f"Directory: {file_info['directory']}")
        
        # Get sorted chunk files
        chunk_files = self.get_chunk_files(file_info['directory'])
        
        if not chunk_files:
            print("   ERROR: No valid chunk files found")
            return False
        
        # Check for missing chunks
        expected_numbers = list(range(1, len(chunk_files) + 1))
        actual_numbers = [num for num, _ in chunk_files]
        missing = set(expected_numbers) - set(actual_numbers)
        
        if missing:
            print(f"   WARNING: Missing chunks: {sorted(missing)}")
        
        print(f"   Found chunks: {actual_numbers}")
        
        # Parse each chunk
        parsed_chunks = []
        metadata = None
        
        for chunk_num, chunk_file in chunk_files:
            print(f"   Processing chunk {chunk_num}...")
            result = self.parser.parse_single_file(chunk_file)
            
            if result['success']:
                parsed_chunks.append({
                    'number': chunk_num,
                    'code': result['migrated_code'],
                    'metadata': result['metadata']
                })
                
                # Use metadata from first successful chunk
                if metadata is None:
                    metadata = result['metadata']
                    
                print(f"      SUCCESS: {len(result['migrated_code'])} chars")
            else:
                print(f"      ERROR: Failed to parse chunk {chunk_num}")
                parsed_chunks.append({
                    'number': chunk_num,
                    'code': None,
                    'metadata': None
                })
        
        if not any(chunk['code'] for chunk in parsed_chunks):
            print("   ERROR: No chunks could be parsed successfully")
            return False
        
        # Combine chunks
        combined_code = []
        successful_chunks = 0
        final_code = ""  # Initialize final_code
        
        for chunk in parsed_chunks:
            if chunk['code']:
                combined_code.append(chunk['code'])
                successful_chunks += 1
            else:
                print(f"   WARNING: Chunk {chunk['number']} failed - adding placeholder comment")
                combined_code.append(f"// ERROR: Chunk {chunk['number']} failed to parse")
        
        final_code = ''.join(combined_code)
        print(f"   Combined {successful_chunks}/{len(parsed_chunks)} chunks successfully")
        print(f"   Final code length: {len(final_code)} characters")
        
        # Save reconstructed file
        return self.save_reconstructed_file(file_info, final_code, metadata)
    
    def save_reconstructed_file(self, file_info: dict, code: str, metadata: dict):
        """Save the reconstructed complete file."""
        try:
            # Create model folder in final output
            model_folder = self.final_output_path / file_info['model']
            model_folder.mkdir(exist_ok=True)
            
            # Save the reconstructed file
            output_file = model_folder / f"{file_info['filename']}.php"
            
            with open(output_file, 'w', encoding='utf-8') as f:
                # Write clean PHP code without metadata header
                f.write(code)
            
            print(f"   SAVED: {output_file}")
            return True
            
        except Exception as e:
            print(f"   ERROR saving file: {e}")
            return False
    
    def reconstruct_all_files(self):
        """Reconstruct all chunked files found."""
        print("🔧 Starting file reconstruction...")
        
        chunked_files = self.find_chunked_files()
        
        if not chunked_files:
            print("No chunked files found to reconstruct")
            return
        
        print(f"Found {len(chunked_files)} chunked files to reconstruct:")
        for file_info in chunked_files:
            print(f"   {file_info['model']}/{file_info['filename']}.php ({file_info['chunk_count']} chunks)")
        
        successful = 0
        failed = 0
        
        for file_info in chunked_files:
            if self.reconstruct_file(file_info):
                successful += 1
            else:
                failed += 1
        
        print(f"\n🎉 Reconstruction completed!")
        print(f"✅ Successfully reconstructed: {successful} files")
        print(f"❌ Failed to reconstruct: {failed} files")
        
        if successful > 0:
            print(f"\n📁 Reconstructed files saved to: {self.final_output_path}")
            for model_folder in sorted(self.final_output_path.iterdir()):
                if model_folder.is_dir():
                    php_files = list(model_folder.glob('*.php'))
                    print(f"   {model_folder.name}/ ({len(php_files)} files)")

# Initialize the reconstructor with our simple parser
reconstructor = FileReconstructor(parser)


In [150]:
parser.process_all_responses()
reconstructor.reconstruct_all_files()

🔄 Processing all model responses...
❌ Directory model_output not found
🔧 Starting file reconstruction...
Found 3 chunked files to reconstruct:
   gemini_1_5_flash/001_getid3.lib.php (3 chunks)
   gemini_1_5_flash/003_wp-db.php (5 chunks)
   mistralai_mistral_small_3_2_24b_instruct_free/001_getid3.lib.php (3 chunks)

Reconstructing 001_getid3.lib.php from 3 chunks
Model: gemini_1_5_flash
Directory: chunked_model_output\gemini_1_5_flash\001_getid3.lib
   Found chunks: [1, 2, 3]
   Processing chunk 1...
Processing 1.txt
   SUCCESS: Found 16634 chars of migrated code
      SUCCESS: 16634 chars
   Processing chunk 2...
Processing 2.txt
   SUCCESS: Found 19555 chars of migrated code
      SUCCESS: 19555 chars
   Processing chunk 3...
Processing 3.txt
   SUCCESS: Found 8281 chars of migrated code
      SUCCESS: 8281 chars
   Combined 3/3 chunks successfully
   Final code length: 44470 characters
   SAVED: new-version\gemini_1_5_flash\001_getid3.lib.php

Reconstructing 003_wp-db.php from 5 chu