In [1]:
import subprocess
import sys
import os
from pathlib import Path
import time

def install_requirements():
    """Install or verify analysis requirements with graceful fallback and detailed progress."""
    
    # Basic requirements fallback
    basic_requirements = [
        'pandas>=1.5.0',
        'numpy>=1.21.0', 
        'matplotlib>=3.5.0',
        'seaborn>=0.11.0',
        'scipy>=1.8.0',
        'tqdm>=4.64.0',
        'pyyaml>=6.0',
        'scikit-learn>=1.1.0',
        'statsmodels>=0.13.0'
    ]
    
    # Try to find requirements_analysis.txt
    requirements_paths = [
        Path('./requirements/requirements_analysis.txt'),
        Path('../requirements/requirements_analysis.txt'),
        Path('../../requirements/requirements_analysis.txt'),
        Path('./Deliverables-Code/requirements/requirements_analysis.txt')
    ]
    
    requirements_file = None
    for path in requirements_paths:
        if path.exists():
            requirements_file = path
            print(f"✓ Found requirements file: {path}")
            break
    
    if requirements_file:
        try:
            # Read requirements file to show what will be installed
            with open(requirements_file, 'r') as f:
                requirements_content = f.read().strip().split('\n')
            
            # Filter out comments and empty lines
            requirements_list = [
                req.strip() for req in requirements_content 
                if req.strip() and not req.strip().startswith('#')
            ]
            
            print(f"\n📦 Installing {len(requirements_list)} packages from requirements file...")
            print("=" * 60)
            
            # Install each requirement individually with progress
            failed_packages = []
            successful_packages = []
            
            for i, requirement in enumerate(requirements_list, 1):
                package_name = requirement.split('>=')[0].split('==')[0].split('[')[0]
                print(f"\n[{i}/{len(requirements_list)}] Installing {package_name}...")
                print(f"   Full requirement: {requirement}")
                
                start_time = time.time()
                try:
                    result = subprocess.run([
                        sys.executable, '-m', 'pip', 'install', requirement, '--timeout', '120'
                    ], capture_output=True, text=True, timeout=180)  # 3 minute timeout per package
                    
                    elapsed_time = time.time() - start_time
                    
                    if result.returncode == 0:
                        print(f"   ✅ {package_name} installed successfully ({elapsed_time:.1f}s)")
                        successful_packages.append(package_name)
                    else:
                        print(f"   ❌ Failed to install {package_name}")
                        print(f"   Error: {result.stderr[:200]}...")
                        failed_packages.append(package_name)
                        
                except subprocess.TimeoutExpired:
                    print(f"   ⏰ Timeout installing {package_name} (>3 minutes)")
                    failed_packages.append(package_name)
                except Exception as e:
                    print(f"   ❌ Exception installing {package_name}: {e}")
                    failed_packages.append(package_name)
            
            # Summary
            print("\n" + "=" * 60)
            print(f"📊 Installation Summary:")
            print(f"   ✅ Successful: {len(successful_packages)}")
            print(f"   ❌ Failed: {len(failed_packages)}")
            
            if successful_packages:
                print(f"\n   Successfully installed: {', '.join(successful_packages[:5])}")
                if len(successful_packages) > 5:
                    print(f"   ... and {len(successful_packages) - 5} more")
            
            if failed_packages:
                print(f"\n   ⚠️  Failed packages: {', '.join(failed_packages)}")
                print("   These will be retried with basic fallback...")
                install_basic_requirements([f"{pkg}>=0.0.0" for pkg in failed_packages])
            
        except Exception as e:
            print(f"⚠ Error reading requirements file: {e}")
            print("Falling back to basic requirements...")
            install_basic_requirements(basic_requirements)
            
    else:
        print("⚠ requirements_analysis.txt not found")
        print("Installing basic requirements for analysis...")
        install_basic_requirements(basic_requirements)

def install_basic_requirements(requirements_list):
    """Install basic requirements individually with progress feedback."""
    print(f"\n🔄 Installing {len(requirements_list)} basic requirements...")
    print("-" * 50)
    
    for i, req in enumerate(requirements_list, 1):
        package_name = req.split('>=')[0].split('==')[0].split('[')[0]
        print(f"[{i}/{len(requirements_list)}] Installing {package_name}...", end=" ")
        
        start_time = time.time()
        try:
            result = subprocess.run([
                sys.executable, '-m', 'pip', 'install', req, '--timeout', '120'
            ], capture_output=True, text=True, timeout=180)
            
            elapsed_time = time.time() - start_time
            
            if result.returncode == 0:
                print(f"✅ ({elapsed_time:.1f}s)")
            else:
                print(f"❌ Failed")
                print(f"    Error: {result.stderr[:100]}...")
                
        except subprocess.TimeoutExpired:
            print("⏰ Timeout (>3 min)")
        except Exception as e:
            print(f"❌ Exception: {str(e)[:50]}...")

def check_package_installed(package_name):
    """Check if a package is already installed."""
    try:
        result = subprocess.run([
            sys.executable, '-m', 'pip', 'show', package_name
        ], capture_output=True, text=True)
        return result.returncode == 0
    except:
        return False

def verify_imports():
    """Verify that key libraries can be imported."""
    required_libraries = {
        'pandas': 'pd',
        'numpy': 'np', 
        'matplotlib.pyplot': 'plt',
        'seaborn': 'sns',
        'scipy': 'scipy',
        'pathlib': 'pathlib',
        'json': 'json',
        'yaml': 'yaml',
        'sklearn': 'sklearn',
        'statsmodels.api': 'sm'
    }
    
    print("\n🔍 Verifying library imports...")
    print("-" * 40)
    failed_imports = []
    
    for lib, alias in required_libraries.items():
        try:
            __import__(lib)
            print(f"✅ {lib:<20} - OK")
        except ImportError as e:
            print(f"❌ {lib:<20} - FAILED: {str(e)[:50]}...")
            failed_imports.append(lib)
    
    if failed_imports:
        print(f"\n⚠️  Warning: {len(failed_imports)} libraries failed to import")
        print("   Failed libraries:", ', '.join(failed_imports))
        print("   You may need to restart the kernel after installation")
    else:
        print("\n✅ All required libraries verified successfully")
    
    return len(failed_imports) == 0

def show_pre_installation_status():
    """Show which packages are already installed."""
    check_packages = ['pandas', 'numpy', 'matplotlib', 'seaborn', 'scipy', 
                     'scikit-learn', 'statsmodels', 'tqdm', 'pyyaml']
    
    print("🔍 Checking current package status...")
    print("-" * 50)
    
    already_installed = []
    need_installation = []
    
    for package in check_packages:
        if check_package_installed(package):
            print(f"✅ {package:<15} - Already installed")
            already_installed.append(package)
        else:
            print(f"❌ {package:<15} - Needs installation")
            need_installation.append(package)
    
    print(f"\n📊 Status Summary:")
    print(f"   Already installed: {len(already_installed)}")
    print(f"   Need installation: {len(need_installation)}")
    
    if need_installation:
        print(f"   Packages to install: {', '.join(need_installation)}")
    
    return already_installed, need_installation

# Run installation and verification with progress tracking
print("=" * 70)
print("🚀 Analysis Requirements Installation & Verification")
print("=" * 70)

# Show pre-installation status
already_installed, need_installation = show_pre_installation_status()

# Proceed with installation
print(f"\n⏱️  Starting installation process at {time.strftime('%H:%M:%S')}")
start_total = time.time()

install_requirements()

total_time = time.time() - start_total
print(f"\n⏱️  Total installation time: {total_time:.1f} seconds")

# Verify installation
verification_success = verify_imports()

print("\n" + "=" * 70)
if verification_success:
    print("🎉 Setup complete! Ready to proceed with analysis.")
else:
    print("⚠️  Some issues detected. You may need to restart the kernel.")
print("=" * 70)

🚀 Analysis Requirements Installation & Verification
🔍 Checking current package status...
--------------------------------------------------
✅ pandas          - Already installed
✅ numpy           - Already installed
❌ matplotlib      - Needs installation
❌ seaborn         - Needs installation
✅ scipy           - Already installed
✅ scikit-learn    - Already installed
❌ statsmodels     - Needs installation
✅ tqdm            - Already installed
✅ pyyaml          - Already installed

📊 Status Summary:
   Already installed: 6
   Need installation: 3
   Packages to install: matplotlib, seaborn, statsmodels

⏱️  Starting installation process at 12:46:51
✓ Found requirements file: ..\requirements\requirements_analysis.txt

📦 Installing 14 packages from requirements file...

[1/14] Installing pandas...
   Full requirement: pandas>=1.5.0
   ✅ pandas installed successfully (1.7s)

[2/14] Installing numpy...
   Full requirement: numpy>=1.21.0
   ✅ numpy installed successfully (1.5s)

[3/14] Insta

In [None]:
import subprocess
import sys
import os
from pathlib import Path
import time

def install_requirements():
    """Install or verify analysis requirements with graceful fallback and detailed progress."""
    
    # Basic requirements fallback
    basic_requirements = [
        'pandas>=1.5.0',
        'numpy>=1.21.0', 
        'matplotlib>=3.5.0',
        'seaborn>=0.11.0',
        'scipy>=1.8.0',
        'tqdm>=4.64.0',
        'pyyaml>=6.0',
        'scikit-learn>=1.1.0',
        'statsmodels>=0.13.0'
    ]
    
    # Try to find requirements_analysis.txt
    requirements_paths = [
        Path('./requirements/requirements_analysis.txt'),
        Path('../requirements/requirements_analysis.txt'),
        Path('../../requirements/requirements_analysis.txt'),
        Path('./Deliverables-Code/requirements/requirements_analysis.txt')
    ]
    
    requirements_file = None
    for path in requirements_paths:
        if path.exists():
            requirements_file = path
            print(f"✓ Found requirements file: {path}")
            break
    
    if requirements_file:
        try:
            # Read requirements file to show what will be installed
            with open(requirements_file, 'r') as f:
                requirements_content = f.read().strip().split('\n')
            
            # Filter out comments and empty lines
            requirements_list = [
                req.strip() for req in requirements_content 
                if req.strip() and not req.strip().startswith('#')
            ]
            
            print(f"\n📦 Installing {len(requirements_list)} packages from requirements file...")
            print("=" * 60)
            
            # Install each requirement individually with progress
            failed_packages = []
            successful_packages = []
            
            for i, requirement in enumerate(requirements_list, 1):
                package_name = requirement.split('>=')[0].split('==')[0].split('[')[0]
                print(f"\n[{i}/{len(requirements_list)}] Installing {package_name}...")
                print(f"   Full requirement: {requirement}")
                
                start_time = time.time()
                try:
                    result = subprocess.run([
                        sys.executable, '-m', 'pip', 'install', requirement, '--timeout', '120'
                    ], capture_output=True, text=True, timeout=180)  # 3 minute timeout per package
                    
                    elapsed_time = time.time() - start_time
                    
                    if result.returncode == 0:
                        print(f"   ✅ {package_name} installed successfully ({elapsed_time:.1f}s)")
                        successful_packages.append(package_name)
                    else:
                        print(f"   ❌ Failed to install {package_name}")
                        print(f"   Error: {result.stderr[:200]}...")
                        failed_packages.append(package_name)
                        
                except subprocess.TimeoutExpired:
                    print(f"   ⏰ Timeout installing {package_name} (>3 minutes)")
                    failed_packages.append(package_name)
                except Exception as e:
                    print(f"   ❌ Exception installing {package_name}: {e}")
                    failed_packages.append(package_name)
            
            # Summary
            print("\n" + "=" * 60)
            print(f"📊 Installation Summary:")
            print(f"   ✅ Successful: {len(successful_packages)}")
            print(f"   ❌ Failed: {len(failed_packages)}")
            
            if successful_packages:
                print(f"\n   Successfully installed: {', '.join(successful_packages[:5])}")
                if len(successful_packages) > 5:
                    print(f"   ... and {len(successful_packages) - 5} more")
            
            if failed_packages:
                print(f"\n   ⚠️  Failed packages: {', '.join(failed_packages)}")
                print("   These will be retried with basic fallback...")
                install_basic_requirements([f"{pkg}>=0.0.0" for pkg in failed_packages])
            
        except Exception as e:
            print(f"⚠ Error reading requirements file: {e}")
            print("Falling back to basic requirements...")
            install_basic_requirements(basic_requirements)
            
    else:
        print("⚠ requirements_analysis.txt not found")
        print("Installing basic requirements for analysis...")
        install_basic_requirements(basic_requirements)

def install_basic_requirements(requirements_list):
    """Install basic requirements individually with progress feedback."""
    print(f"\n🔄 Installing {len(requirements_list)} basic requirements...")
    print("-" * 50)
    
    for i, req in enumerate(requirements_list, 1):
        package_name = req.split('>=')[0].split('==')[0].split('[')[0]
        print(f"[{i}/{len(requirements_list)}] Installing {package_name}...", end=" ")
        
        start_time = time.time()
        try:
            result = subprocess.run([
                sys.executable, '-m', 'pip', 'install', req, '--timeout', '120'
            ], capture_output=True, text=True, timeout=180)
            
            elapsed_time = time.time() - start_time
            
            if result.returncode == 0:
                print(f"✅ ({elapsed_time:.1f}s)")
            else:
                print(f"❌ Failed")
                print(f"    Error: {result.stderr[:100]}...")
                
        except subprocess.TimeoutExpired:
            print("⏰ Timeout (>3 min)")
        except Exception as e:
            print(f"❌ Exception: {str(e)[:50]}...")

def check_package_installed(package_name):
    """Check if a package is already installed."""
    try:
        result = subprocess.run([
            sys.executable, '-m', 'pip', 'show', package_name
        ], capture_output=True, text=True)
        return result.returncode == 0
    except:
        return False

def verify_imports():
    """Verify that key libraries can be imported."""
    required_libraries = {
        'pandas': 'pd',
        'numpy': 'np', 
        'matplotlib.pyplot': 'plt',
        'seaborn': 'sns',
        'scipy': 'scipy',
        'pathlib': 'pathlib',
        'json': 'json',
        'yaml': 'yaml',
        'sklearn': 'sklearn',
        'statsmodels.api': 'sm'
    }
    
    print("\n🔍 Verifying library imports...")
    print("-" * 40)
    failed_imports = []
    
    for lib, alias in required_libraries.items():
        try:
            __import__(lib)
            print(f"✅ {lib:<20} - OK")
        except ImportError as e:
            print(f"❌ {lib:<20} - FAILED: {str(e)[:50]}...")
            failed_imports.append(lib)
    
    if failed_imports:
        print(f"\n⚠️  Warning: {len(failed_imports)} libraries failed to import")
        print("   Failed libraries:", ', '.join(failed_imports))
        print("   You may need to restart the kernel after installation")
    else:
        print("\n✅ All required libraries verified successfully")
    
    return len(failed_imports) == 0

def show_pre_installation_status():
    """Show which packages are already installed."""
    check_packages = ['pandas', 'numpy', 'matplotlib', 'seaborn', 'scipy', 
                     'scikit-learn', 'statsmodels', 'tqdm', 'pyyaml']
    
    print("🔍 Checking current package status...")
    print("-" * 50)
    
    already_installed = []
    need_installation = []
    
    for package in check_packages:
        if check_package_installed(package):
            print(f"✅ {package:<15} - Already installed")
            already_installed.append(package)
        else:
            print(f"❌ {package:<15} - Needs installation")
            need_installation.append(package)
    
    print(f"\n📊 Status Summary:")
    print(f"   Already installed: {len(already_installed)}")
    print(f"   Need installation: {len(need_installation)}")
    
    if need_installation:
        print(f"   Packages to install: {', '.join(need_installation)}")
    
    return already_installed, need_installation

# Run installation and verification with progress tracking
print("=" * 70)
print("🚀 Analysis Requirements Installation & Verification")
print("=" * 70)

# Show pre-installation status
already_installed, need_installation = show_pre_installation_status()

# Proceed with installation
print(f"\n⏱️  Starting installation process at {time.strftime('%H:%M:%S')}")
start_total = time.time()

install_requirements()

total_time = time.time() - start_total
print(f"\n⏱️  Total installation time: {total_time:.1f} seconds")

# Verify installation
verification_success = verify_imports()

print("\n" + "=" * 70)
if verification_success:
    print("🎉 Setup complete! Ready to proceed with analysis.")
else:
    print("⚠️  Some issues detected. You may need to restart the kernel.")
print("=" * 70)

🚀 Analysis Requirements Installation & Verification
🔍 Checking current package status...
--------------------------------------------------
✅ pandas          - Already installed
✅ numpy           - Already installed
❌ matplotlib      - Needs installation
❌ seaborn         - Needs installation
✅ scipy           - Already installed
✅ scikit-learn    - Already installed
❌ statsmodels     - Needs installation
✅ tqdm            - Already installed
✅ pyyaml          - Already installed

📊 Status Summary:
   Already installed: 6
   Need installation: 3
   Packages to install: matplotlib, seaborn, statsmodels

⏱️  Starting installation process at 12:46:51
✓ Found requirements file: ..\requirements\requirements_analysis.txt

📦 Installing 14 packages from requirements file...

[1/14] Installing pandas...
   Full requirement: pandas>=1.5.0
   ✅ pandas installed successfully (1.7s)

[2/14] Installing numpy...
   Full requirement: numpy>=1.21.0
   ✅ numpy installed successfully (1.5s)

[3/14] Insta

In [None]:
import subprocess
import sys
import os
from pathlib import Path
import time

def install_requirements():
    """Install or verify analysis requirements with graceful fallback and detailed progress."""
    
    # Basic requirements fallback
    basic_requirements = [
        'pandas>=1.5.0',
        'numpy>=1.21.0', 
        'matplotlib>=3.5.0',
        'seaborn>=0.11.0',
        'scipy>=1.8.0',
        'tqdm>=4.64.0',
        'pyyaml>=6.0',
        'scikit-learn>=1.1.0',
        'statsmodels>=0.13.0'
    ]
    
    # Try to find requirements_analysis.txt
    requirements_paths = [
        Path('./requirements/requirements_analysis.txt'),
        Path('../requirements/requirements_analysis.txt'),
        Path('../../requirements/requirements_analysis.txt'),
        Path('./Deliverables-Code/requirements/requirements_analysis.txt')
    ]
    
    requirements_file = None
    for path in requirements_paths:
        if path.exists():
            requirements_file = path
            print(f"✓ Found requirements file: {path}")
            break
    
    if requirements_file:
        try:
            # Read requirements file to show what will be installed
            with open(requirements_file, 'r') as f:
                requirements_content = f.read().strip().split('\n')
            
            # Filter out comments and empty lines
            requirements_list = [
                req.strip() for req in requirements_content 
                if req.strip() and not req.strip().startswith('#')
            ]
            
            print(f"\n📦 Installing {len(requirements_list)} packages from requirements file...")
            print("=" * 60)
            
            # Install each requirement individually with progress
            failed_packages = []
            successful_packages = []
            
            for i, requirement in enumerate(requirements_list, 1):
                package_name = requirement.split('>=')[0].split('==')[0].split('[')[0]
                print(f"\n[{i}/{len(requirements_list)}] Installing {package_name}...")
                print(f"   Full requirement: {requirement}")
                
                start_time = time.time()
                try:
                    result = subprocess.run([
                        sys.executable, '-m', 'pip', 'install', requirement, '--timeout', '120'
                    ], capture_output=True, text=True, timeout=180)  # 3 minute timeout per package
                    
                    elapsed_time = time.time() - start_time
                    
                    if result.returncode == 0:
                        print(f"   ✅ {package_name} installed successfully ({elapsed_time:.1f}s)")
                        successful_packages.append(package_name)
                    else:
                        print(f"   ❌ Failed to install {package_name}")
                        print(f"   Error: {result.stderr[:200]}...")
                        failed_packages.append(package_name)
                        
                except subprocess.TimeoutExpired:
                    print(f"   ⏰ Timeout installing {package_name} (>3 minutes)")
                    failed_packages.append(package_name)
                except Exception as e:
                    print(f"   ❌ Exception installing {package_name}: {e}")
                    failed_packages.append(package_name)
            
            # Summary
            print("\n" + "=" * 60)
            print(f"📊 Installation Summary:")
            print(f"   ✅ Successful: {len(successful_packages)}")
            print(f"   ❌ Failed: {len(failed_packages)}")
            
            if successful_packages:
                print(f"\n   Successfully installed: {', '.join(successful_packages[:5])}")
                if len(successful_packages) > 5:
                    print(f"   ... and {len(successful_packages) - 5} more")
            
            if failed_packages:
                print(f"\n   ⚠️  Failed packages: {', '.join(failed_packages)}")
                print("   These will be retried with basic fallback...")
                install_basic_requirements([f"{pkg}>=0.0.0" for pkg in failed_packages])
            
        except Exception as e:
            print(f"⚠ Error reading requirements file: {e}")
            print("Falling back to basic requirements...")
            install_basic_requirements(basic_requirements)
            
    else:
        print("⚠ requirements_analysis.txt not found")
        print("Installing basic requirements for analysis...")
        install_basic_requirements(basic_requirements)

def install_basic_requirements(requirements_list):
    """Install basic requirements individually with progress feedback."""
    print(f"\n🔄 Installing {len(requirements_list)} basic requirements...")
    print("-" * 50)
    
    for i, req in enumerate(requirements_list, 1):
        package_name = req.split('>=')[0].split('==')[0].split('[')[0]
        print(f"[{i}/{len(requirements_list)}] Installing {package_name}...", end=" ")
        
        start_time = time.time()
        try:
            result = subprocess.run([
                sys.executable, '-m', 'pip', 'install', req, '--timeout', '120'
            ], capture_output=True, text=True, timeout=180)
            
            elapsed_time = time.time() - start_time
            
            if result.returncode == 0:
                print(f"✅ ({elapsed_time:.1f}s)")
            else:
                print(f"❌ Failed")
                print(f"    Error: {result.stderr[:100]}...")
                
        except subprocess.TimeoutExpired:
            print("⏰ Timeout (>3 min)")
        except Exception as e:
            print(f"❌ Exception: {str(e)[:50]}...")

def check_package_installed(package_name):
    """Check if a package is already installed."""
    try:
        result = subprocess.run([
            sys.executable, '-m', 'pip', 'show', package_name
        ], capture_output=True, text=True)
        return result.returncode == 0
    except:
        return False

def verify_imports():
    """Verify that key libraries can be imported."""
    required_libraries = {
        'pandas': 'pd',
        'numpy': 'np', 
        'matplotlib.pyplot': 'plt',
        'seaborn': 'sns',
        'scipy': 'scipy',
        'pathlib': 'pathlib',
        'json': 'json',
        'yaml': 'yaml',
        'sklearn': 'sklearn',
        'statsmodels.api': 'sm'
    }
    
    print("\n🔍 Verifying library imports...")
    print("-" * 40)
    failed_imports = []
    
    for lib, alias in required_libraries.items():
        try:
            __import__(lib)
            print(f"✅ {lib:<20} - OK")
        except ImportError as e:
            print(f"❌ {lib:<20} - FAILED: {str(e)[:50]}...")
            failed_imports.append(lib)
    
    if failed_imports:
        print(f"\n⚠️  Warning: {len(failed_imports)} libraries failed to import")
        print("   Failed libraries:", ', '.join(failed_imports))
        print("   You may need to restart the kernel after installation")
    else:
        print("\n✅ All required libraries verified successfully")
    
    return len(failed_imports) == 0

def show_pre_installation_status():
    """Show which packages are already installed."""
    check_packages = ['pandas', 'numpy', 'matplotlib', 'seaborn', 'scipy', 
                     'scikit-learn', 'statsmodels', 'tqdm', 'pyyaml']
    
    print("🔍 Checking current package status...")
    print("-" * 50)
    
    already_installed = []
    need_installation = []
    
    for package in check_packages:
        if check_package_installed(package):
            print(f"✅ {package:<15} - Already installed")
            already_installed.append(package)
        else:
            print(f"❌ {package:<15} - Needs installation")
            need_installation.append(package)
    
    print(f"\n📊 Status Summary:")
    print(f"   Already installed: {len(already_installed)}")
    print(f"   Need installation: {len(need_installation)}")
    
    if need_installation:
        print(f"   Packages to install: {', '.join(need_installation)}")
    
    return already_installed, need_installation

# Run installation and verification with progress tracking
print("=" * 70)
print("🚀 Analysis Requirements Installation & Verification")
print("=" * 70)

# Show pre-installation status
already_installed, need_installation = show_pre_installation_status()

# Proceed with installation
print(f"\n⏱️  Starting installation process at {time.strftime('%H:%M:%S')}")
start_total = time.time()

install_requirements()

total_time = time.time() - start_total
print(f"\n⏱️  Total installation time: {total_time:.1f} seconds")

# Verify installation
verification_success = verify_imports()

print("\n" + "=" * 70)
if verification_success:
    print("🎉 Setup complete! Ready to proceed with analysis.")
else:
    print("⚠️  Some issues detected. You may need to restart the kernel.")
print("=" * 70)

🚀 Analysis Requirements Installation & Verification
🔍 Checking current package status...
--------------------------------------------------
✅ pandas          - Already installed
✅ numpy           - Already installed
❌ matplotlib      - Needs installation
❌ seaborn         - Needs installation
✅ scipy           - Already installed
✅ scikit-learn    - Already installed
❌ statsmodels     - Needs installation
✅ tqdm            - Already installed
✅ pyyaml          - Already installed

📊 Status Summary:
   Already installed: 6
   Need installation: 3
   Packages to install: matplotlib, seaborn, statsmodels

⏱️  Starting installation process at 12:46:51
✓ Found requirements file: ..\requirements\requirements_analysis.txt

📦 Installing 14 packages from requirements file...

[1/14] Installing pandas...
   Full requirement: pandas>=1.5.0
   ✅ pandas installed successfully (1.7s)

[2/14] Installing numpy...
   Full requirement: numpy>=1.21.0
   ✅ numpy installed successfully (1.5s)

[3/14] Insta

In [None]:
import subprocess
import sys
import os
from pathlib import Path
import time

def install_requirements():
    """Install or verify analysis requirements with graceful fallback and detailed progress."""
    
    # Basic requirements fallback
    basic_requirements = [
        'pandas>=1.5.0',
        'numpy>=1.21.0', 
        'matplotlib>=3.5.0',
        'seaborn>=0.11.0',
        'scipy>=1.8.0',
        'tqdm>=4.64.0',
        'pyyaml>=6.0',
        'scikit-learn>=1.1.0',
        'statsmodels>=0.13.0'
    ]
    
    # Try to find requirements_analysis.txt
    requirements_paths = [
        Path('./requirements/requirements_analysis.txt'),
        Path('../requirements/requirements_analysis.txt'),
        Path('../../requirements/requirements_analysis.txt'),
        Path('./Deliverables-Code/requirements/requirements_analysis.txt')
    ]
    
    requirements_file = None
    for path in requirements_paths:
        if path.exists():
            requirements_file = path
            print(f"✓ Found requirements file: {path}")
            break
    
    if requirements_file:
        try:
            # Read requirements file to show what will be installed
            with open(requirements_file, 'r') as f:
                requirements_content = f.read().strip().split('\n')
            
            # Filter out comments and empty lines
            requirements_list = [
                req.strip() for req in requirements_content 
                if req.strip() and not req.strip().startswith('#')
            ]
            
            print(f"\n📦 Installing {len(requirements_list)} packages from requirements file...")
            print("=" * 60)
            
            # Install each requirement individually with progress
            failed_packages = []
            successful_packages = []
            
            for i, requirement in enumerate(requirements_list, 1):
                package_name = requirement.split('>=')[0].split('==')[0].split('[')[0]
                print(f"\n[{i}/{len(requirements_list)}] Installing {package_name}...")
                print(f"   Full requirement: {requirement}")
                
                start_time = time.time()
                try:
                    result = subprocess.run([
                        sys.executable, '-m', 'pip', 'install', requirement, '--timeout', '120'
                    ], capture_output=True, text=True, timeout=180)  # 3 minute timeout per package
                    
                    elapsed_time = time.time() - start_time
                    
                    if result.returncode == 0:
                        print(f"   ✅ {package_name} installed successfully ({elapsed_time:.1f}s)")
                        successful_packages.append(package_name)
                    else:
                        print(f"   ❌ Failed to install {package_name}")
                        print(f"   Error: {result.stderr[:200]}...")
                        failed_packages.append(package_name)
                        
                except subprocess.TimeoutExpired:
                    print(f"   ⏰ Timeout installing {package_name} (>3 minutes)")
                    failed_packages.append(package_name)
                except Exception as e:
                    print(f"   ❌ Exception installing {package_name}: {e}")
                    failed_packages.append(package_name)
            
            # Summary
            print("\n" + "=" * 60)
            print(f"📊 Installation Summary:")
            print(f"   ✅ Successful: {len(successful_packages)}")
            print(f"   ❌ Failed: {len(failed_packages)}")
            
            if successful_packages:
                print(f"\n   Successfully installed: {', '.join(successful_packages[:5])}")
                if len(successful_packages) > 5:
                    print(f"   ... and {len(successful_packages) - 5} more")
            
            if failed_packages:
                print(f"\n   ⚠️  Failed packages: {', '.join(failed_packages)}")
                print("   These will be retried with basic fallback...")
                install_basic_requirements([f"{pkg}>=0.0.0" for pkg in failed_packages])
            
        except Exception as e:
            print(f"⚠ Error reading requirements file: {e}")
            print("Falling back to basic requirements...")
            install_basic_requirements(basic_requirements)
            
    else:
        print("⚠ requirements_analysis.txt not found")
        print("Installing basic requirements for analysis...")
        install_basic_requirements(basic_requirements)

def install_basic_requirements(requirements_list):
    """Install basic requirements individually with progress feedback."""
    print(f"\n🔄 Installing {len(requirements_list)} basic requirements...")
    print("-" * 50)
    
    for i, req in enumerate(requirements_list, 1):
        package_name = req.split('>=')[0].split('==')[0].split('[')[0]
        print(f"[{i}/{len(requirements_list)}] Installing {package_name}...", end=" ")
        
        start_time = time.time()
        try:
            result = subprocess.run([
                sys.executable, '-m', 'pip', 'install', req, '--timeout', '120'
            ], capture_output=True, text=True, timeout=180)
            
            elapsed_time = time.time() - start_time
            
            if result.returncode == 0:
                print(f"✅ ({elapsed_time:.1f}s)")
            else:
                print(f"❌ Failed")
                print(f"    Error: {result.stderr[:100]}...")
                
        except subprocess.TimeoutExpired:
            print("⏰ Timeout (>3 min)")
        except Exception as e:
            print(f"❌ Exception: {str(e)[:50]}...")

def check_package_installed(package_name):
    """Check if a package is already installed."""
    try:
        result = subprocess.run([
            sys.executable, '-m', 'pip', 'show', package_name
        ], capture_output=True, text=True)
        return result.returncode == 0
    except:
        return False

def verify_imports():
    """Verify that key libraries can be imported."""
    required_libraries = {
        'pandas': 'pd',
        'numpy': 'np', 
        'matplotlib.pyplot': 'plt',
        'seaborn': 'sns',
        'scipy': 'scipy',
        'pathlib': 'pathlib',
        'json': 'json',
        'yaml': 'yaml',
        'sklearn': 'sklearn',
        'statsmodels.api': 'sm'
    }
    
    print("\n🔍 Verifying library imports...")
    print("-" * 40)
    failed_imports = []
    
    for lib, alias in required_libraries.items():
        try:
            __import__(lib)
            print(f"✅ {lib:<20} - OK")
        except ImportError as e:
            print(f"❌ {lib:<20} - FAILED: {str(e)[:50]}...")
            failed_imports.append(lib)
    
    if failed_imports:
        print(f"\n⚠️  Warning: {len(failed_imports)} libraries failed to import")
        print("   Failed libraries:", ', '.join(failed_imports))
        print("   You may need to restart the kernel after installation")
    else:
        print("\n✅ All required libraries verified successfully")
    
    return len(failed_imports) == 0

def show_pre_installation_status():
    """Show which packages are already installed."""
    check_packages = ['pandas', 'numpy', 'matplotlib', 'seaborn', 'scipy', 
                     'scikit-learn', 'statsmodels', 'tqdm', 'pyyaml']
    
    print("🔍 Checking current package status...")
    print("-" * 50)
    
    already_installed = []
    need_installation = []
    
    for package in check_packages:
        if check_package_installed(package):
            print(f"✅ {package:<15} - Already installed")
            already_installed.append(package)
        else:
            print(f"❌ {package:<15} - Needs installation")
            need_installation.append(package)
    
    print(f"\n📊 Status Summary:")
    print(f"   Already installed: {len(already_installed)}")
    print(f"   Need installation: {len(need_installation)}")
    
    if need_installation:
        print(f"   Packages to install: {', '.join(need_installation)}")
    
    return already_installed, need_installation

# Run installation and verification with progress tracking
print("=" * 70)
print("🚀 Analysis Requirements Installation & Verification")
print("=" * 70)

# Show pre-installation status
already_installed, need_installation = show_pre_installation_status()

# Proceed with installation
print(f"\n⏱️  Starting installation process at {time.strftime('%H:%M:%S')}")
start_total = time.time()

install_requirements()

total_time = time.time() - start_total
print(f"\n⏱️  Total installation time: {total_time:.1f} seconds")

# Verify installation
verification_success = verify_imports()

print("\n" + "=" * 70)
if verification_success:
    print("🎉 Setup complete! Ready to proceed with analysis.")
else:
    print("⚠️  Some issues detected. You may need to restart the kernel.")
print("=" * 70)

🚀 Analysis Requirements Installation & Verification
🔍 Checking current package status...
--------------------------------------------------
✅ pandas          - Already installed
✅ numpy           - Already installed
❌ matplotlib      - Needs installation
❌ seaborn         - Needs installation
✅ scipy           - Already installed
✅ scikit-learn    - Already installed
❌ statsmodels     - Needs installation
✅ tqdm            - Already installed
✅ pyyaml          - Already installed

📊 Status Summary:
   Already installed: 6
   Need installation: 3
   Packages to install: matplotlib, seaborn, statsmodels

⏱️  Starting installation process at 12:46:51
✓ Found requirements file: ..\requirements\requirements_analysis.txt

📦 Installing 14 packages from requirements file...

[1/14] Installing pandas...
   Full requirement: pandas>=1.5.0
   ✅ pandas installed successfully (1.7s)

[2/14] Installing numpy...
   Full requirement: numpy>=1.21.0
   ✅ numpy installed successfully (1.5s)

[3/14] Insta

In [None]:
import subprocess
import sys
import os
from pathlib import Path

def install_requirements():
    """Install or verify analysis requirements with graceful fallback."""
    
    # Basic requirements fallback
    basic_requirements = [
        'pandas>=1.5.0',
        'numpy>=1.21.0', 
        'matplotlib>=3.5.0',
        'seaborn>=0.11.0',
        'scipy>=1.8.0',
        'pathlib2>=2.3.0',
        'tqdm>=4.64.0',
        'pyyaml>=6.0'
    ]
    
    # Try to find requirements_analysis.txt
    requirements_paths = [
        Path('./requirements/requirements_analysis.txt'),
        Path('../requirements/requirements_analysis.txt'),
        Path('../../requirements/requirements_analysis.txt'),
        Path('./Deliverables-Code/requirements/requirements_analysis.txt')
    ]
    
    requirements_file = None
    for path in requirements_paths:
        if path.exists():
            requirements_file = path
            print(f"✓ Found requirements file: {path}")
            break
    
    if requirements_file:
        try:
            # Install from requirements file
            print("Installing requirements from file...")
            result = subprocess.run([
                sys.executable, '-m', 'pip', 'install', '-r', str(requirements_file)
            ], capture_output=True, text=True, check=True)
            print("✓ Requirements installed successfully from file")
            
        except subprocess.CalledProcessError as e:
            print(f"⚠ Error installing from requirements file: {e}")
            print("Falling back to basic requirements...")
            install_basic_requirements(basic_requirements)
            
    else:
        print("⚠ requirements_analysis.txt not found")
        print("Installing basic requirements for analysis...")
        install_basic_requirements(basic_requirements)

def install_basic_requirements(requirements_list):
    """Install basic requirements individually."""
    for req in requirements_list:
        try:
            print(f"Installing {req}...")
            subprocess.run([
                sys.executable, '-m', 'pip', 'install', req
            ], capture_output=True, text=True, check=True)
            print(f"✓ {req} installed")
        except subprocess.CalledProcessError as e:
            print(f"⚠ Failed to install {req}: {e}")

def verify_imports():
    """Verify that key libraries can be imported."""
    required_libraries = {
        'pandas': 'pd',
        'numpy': 'np', 
        'matplotlib.pyplot': 'plt',
        'seaborn': 'sns',
        'scipy': 'scipy',
        'pathlib': 'pathlib',
        'json': 'json',
        'yaml': 'yaml'
    }
    
    print("\nVerifying library imports...")
    failed_imports = []
    
    for lib, alias in required_libraries.items():
        try:
            __import__(lib)
            print(f"✓ {lib} - OK")
        except ImportError as e:
            print(f"✗ {lib} - FAILED: {e}")
            failed_imports.append(lib)
    
    if failed_imports:
        print(f"\n⚠ Warning: {len(failed_imports)} libraries failed to import")
        print("You may need to restart the kernel after installation")
    else:
        print("\n✓ All required libraries verified successfully")
    
    return len(failed_imports) == 0

# Run installation and verification
print("=== Analysis Requirements Installation & Verification ===")
install_requirements()
verification_success = verify_imports()

if verification_success:
    print("\n🎉 Setup complete! Ready to proceed with analysis.")
else:
    print("\n⚠ Some issues detected. You may need to restart the kernel.")


=== Analysis Requirements Installation & Verification ===
✓ Found requirements file: ..\requirements\requirements_analysis.txt
Installing requirements from file...


In [None]:
import subprocess
import sys
import os
from pathlib import Path

def install_requirements():
    """Install or verify analysis requirements with graceful fallback."""
    
    # Basic requirements fallback
    basic_requirements = [
        'pandas>=1.5.0',
        'numpy>=1.21.0', 
        'matplotlib>=3.5.0',
        'seaborn>=0.11.0',
        'scipy>=1.8.0',
        'pathlib2>=2.3.0',
        'tqdm>=4.64.0',
        'pyyaml>=6.0'
    ]
    
    # Try to find requirements_analysis.txt
    requirements_paths = [
        Path('./requirements/requirements_analysis.txt'),
        Path('../requirements/requirements_analysis.txt'),
        Path('../../requirements/requirements_analysis.txt'),
        Path('./Deliverables-Code/requirements/requirements_analysis.txt')
    ]
    
    requirements_file = None
    for path in requirements_paths:
        if path.exists():
            requirements_file = path
            print(f"✓ Found requirements file: {path}")
            break
    
    if requirements_file:
        try:
            # Install from requirements file
            print("Installing requirements from file...")
            result = subprocess.run([
                sys.executable, '-m', 'pip', 'install', '-r', str(requirements_file)
            ], capture_output=True, text=True, check=True)
            print("✓ Requirements installed successfully from file")
            
        except subprocess.CalledProcessError as e:
            print(f"⚠ Error installing from requirements file: {e}")
            print("Falling back to basic requirements...")
            install_basic_requirements(basic_requirements)
            
    else:
        print("⚠ requirements_analysis.txt not found")
        print("Installing basic requirements for analysis...")
        install_basic_requirements(basic_requirements)

def install_basic_requirements(requirements_list):
    """Install basic requirements individually."""
    for req in requirements_list:
        try:
            print(f"Installing {req}...")
            subprocess.run([
                sys.executable, '-m', 'pip', 'install', req
            ], capture_output=True, text=True, check=True)
            print(f"✓ {req} installed")
        except subprocess.CalledProcessError as e:
            print(f"⚠ Failed to install {req}: {e}")

def verify_imports():
    """Verify that key libraries can be imported."""
    required_libraries = {
        'pandas': 'pd',
        'numpy': 'np', 
        'matplotlib.pyplot': 'plt',
        'seaborn': 'sns',
        'scipy': 'scipy',
        'pathlib': 'pathlib',
        'json': 'json',
        'yaml': 'yaml'
    }
    
    print("\nVerifying library imports...")
    failed_imports = []
    
    for lib, alias in required_libraries.items():
        try:
            __import__(lib)
            print(f"✓ {lib} - OK")
        except ImportError as e:
            print(f"✗ {lib} - FAILED: {e}")
            failed_imports.append(lib)
    
    if failed_imports:
        print(f"\n⚠ Warning: {len(failed_imports)} libraries failed to import")
        print("You may need to restart the kernel after installation")
    else:
        print("\n✓ All required libraries verified successfully")
    
    return len(failed_imports) == 0

# Run installation and verification
print("=== Analysis Requirements Installation & Verification ===")
install_requirements()
verification_success = verify_imports()

if verification_success:
    print("\n🎉 Setup complete! Ready to proceed with analysis.")
else:
    print("\n⚠ Some issues detected. You may need to restart the kernel.")


=== Analysis Requirements Installation & Verification ===
✓ Found requirements file: ..\requirements\requirements_analysis.txt
Installing requirements from file...


In [None]:
import subprocess
import sys
import os
from pathlib import Path

def install_requirements():
    """Install or verify analysis requirements with graceful fallback."""
    
    # Basic requirements fallback
    basic_requirements = [
        'pandas>=1.5.0',
        'numpy>=1.21.0', 
        'matplotlib>=3.5.0',
        'seaborn>=0.11.0',
        'scipy>=1.8.0',
        'pathlib2>=2.3.0',
        'tqdm>=4.64.0',
        'pyyaml>=6.0'
    ]
    
    # Try to find requirements_analysis.txt
    requirements_paths = [
        Path('./requirements/requirements_analysis.txt'),
        Path('../requirements/requirements_analysis.txt'),
        Path('../../requirements/requirements_analysis.txt'),
        Path('./Deliverables-Code/requirements/requirements_analysis.txt')
    ]
    
    requirements_file = None
    for path in requirements_paths:
        if path.exists():
            requirements_file = path
            print(f"✓ Found requirements file: {path}")
            break
    
    if requirements_file:
        try:
            # Install from requirements file
            print("Installing requirements from file...")
            result = subprocess.run([
                sys.executable, '-m', 'pip', 'install', '-r', str(requirements_file)
            ], capture_output=True, text=True, check=True)
            print("✓ Requirements installed successfully from file")
            
        except subprocess.CalledProcessError as e:
            print(f"⚠ Error installing from requirements file: {e}")
            print("Falling back to basic requirements...")
            install_basic_requirements(basic_requirements)
            
    else:
        print("⚠ requirements_analysis.txt not found")
        print("Installing basic requirements for analysis...")
        install_basic_requirements(basic_requirements)

def install_basic_requirements(requirements_list):
    """Install basic requirements individually."""
    for req in requirements_list:
        try:
            print(f"Installing {req}...")
            subprocess.run([
                sys.executable, '-m', 'pip', 'install', req
            ], capture_output=True, text=True, check=True)
            print(f"✓ {req} installed")
        except subprocess.CalledProcessError as e:
            print(f"⚠ Failed to install {req}: {e}")

def verify_imports():
    """Verify that key libraries can be imported."""
    required_libraries = {
        'pandas': 'pd',
        'numpy': 'np', 
        'matplotlib.pyplot': 'plt',
        'seaborn': 'sns',
        'scipy': 'scipy',
        'pathlib': 'pathlib',
        'json': 'json',
        'yaml': 'yaml'
    }
    
    print("\nVerifying library imports...")
    failed_imports = []
    
    for lib, alias in required_libraries.items():
        try:
            __import__(lib)
            print(f"✓ {lib} - OK")
        except ImportError as e:
            print(f"✗ {lib} - FAILED: {e}")
            failed_imports.append(lib)
    
    if failed_imports:
        print(f"\n⚠ Warning: {len(failed_imports)} libraries failed to import")
        print("You may need to restart the kernel after installation")
    else:
        print("\n✓ All required libraries verified successfully")
    
    return len(failed_imports) == 0

# Run installation and verification
print("=== Analysis Requirements Installation & Verification ===")
install_requirements()
verification_success = verify_imports()

if verification_success:
    print("\n🎉 Setup complete! Ready to proceed with analysis.")
else:
    print("\n⚠ Some issues detected. You may need to restart the kernel.")


=== Analysis Requirements Installation & Verification ===
✓ Found requirements file: ..\requirements\requirements_analysis.txt
Installing requirements from file...


In [None]:
import subprocess
import sys
import os
from pathlib import Path

def install_requirements():
    """Install or verify analysis requirements with graceful fallback."""
    
    # Basic requirements fallback
    basic_requirements = [
        'pandas>=1.5.0',
        'numpy>=1.21.0', 
        'matplotlib>=3.5.0',
        'seaborn>=0.11.0',
        'scipy>=1.8.0',
        'pathlib2>=2.3.0',
        'tqdm>=4.64.0',
        'pyyaml>=6.0'
    ]
    
    # Try to find requirements_analysis.txt
    requirements_paths = [
        Path('./requirements/requirements_analysis.txt'),
        Path('../requirements/requirements_analysis.txt'),
        Path('../../requirements/requirements_analysis.txt'),
        Path('./Deliverables-Code/requirements/requirements_analysis.txt')
    ]
    
    requirements_file = None
    for path in requirements_paths:
        if path.exists():
            requirements_file = path
            print(f"✓ Found requirements file: {path}")
            break
    
    if requirements_file:
        try:
            # Install from requirements file
            print("Installing requirements from file...")
            result = subprocess.run([
                sys.executable, '-m', 'pip', 'install', '-r', str(requirements_file)
            ], capture_output=True, text=True, check=True)
            print("✓ Requirements installed successfully from file")
            
        except subprocess.CalledProcessError as e:
            print(f"⚠ Error installing from requirements file: {e}")
            print("Falling back to basic requirements...")
            install_basic_requirements(basic_requirements)
            
    else:
        print("⚠ requirements_analysis.txt not found")
        print("Installing basic requirements for analysis...")
        install_basic_requirements(basic_requirements)

def install_basic_requirements(requirements_list):
    """Install basic requirements individually."""
    for req in requirements_list:
        try:
            print(f"Installing {req}...")
            subprocess.run([
                sys.executable, '-m', 'pip', 'install', req
            ], capture_output=True, text=True, check=True)
            print(f"✓ {req} installed")
        except subprocess.CalledProcessError as e:
            print(f"⚠ Failed to install {req}: {e}")

def verify_imports():
    """Verify that key libraries can be imported."""
    required_libraries = {
        'pandas': 'pd',
        'numpy': 'np', 
        'matplotlib.pyplot': 'plt',
        'seaborn': 'sns',
        'scipy': 'scipy',
        'pathlib': 'pathlib',
        'json': 'json',
        'yaml': 'yaml'
    }
    
    print("\nVerifying library imports...")
    failed_imports = []
    
    for lib, alias in required_libraries.items():
        try:
            __import__(lib)
            print(f"✓ {lib} - OK")
        except ImportError as e:
            print(f"✗ {lib} - FAILED: {e}")
            failed_imports.append(lib)
    
    if failed_imports:
        print(f"\n⚠ Warning: {len(failed_imports)} libraries failed to import")
        print("You may need to restart the kernel after installation")
    else:
        print("\n✓ All required libraries verified successfully")
    
    return len(failed_imports) == 0

# Run installation and verification
print("=== Analysis Requirements Installation & Verification ===")
install_requirements()
verification_success = verify_imports()

if verification_success:
    print("\n🎉 Setup complete! Ready to proceed with analysis.")
else:
    print("\n⚠ Some issues detected. You may need to restart the kernel.")


=== Analysis Requirements Installation & Verification ===
✓ Found requirements file: ..\requirements\requirements_analysis.txt
Installing requirements from file...


In [2]:
def find_project_root():
    """
    Find project root by locating directory containing .gitignore and .gitattributes.
    Similar to implementation in 03_pixtral_model.py
    """
    from pathlib import Path
    import sys
    
    try:
        # When running as a script, start from script location
        start_path = Path(__file__).parent
    except NameError:
        # When running in a notebook, start from current working directory
        start_path = Path.cwd()
    
    # Walk up the directory tree to find git markers
    current_path = start_path
    while current_path != current_path.parent:  # Stop at filesystem root
        if (current_path / ".gitignore").exists() and (current_path / ".gitattributes").exists():
            return current_path
        current_path = current_path.parent
    
    raise RuntimeError("Could not find project root (directory containing .gitignore and .gitattributes)")

def setup_project_paths():
    """Set up all project directory paths and verify they exist."""
    global ROOT_DIR, DELIVERABLES_DIR, DATA_DIR, RESULTS_DIR, ANALYSIS_DIR, CONFIG_DIR
    
    # Find and set root directory
    ROOT_DIR = find_project_root()
    print(f"✓ Found project root: {ROOT_DIR}")
    
    # Set up key directories
    DELIVERABLES_DIR = ROOT_DIR / "Deliverables-Code"
    DATA_DIR = DELIVERABLES_DIR / "data"
    RESULTS_DIR = DELIVERABLES_DIR / "results"
    ANALYSIS_DIR = DELIVERABLES_DIR / "analysis"
    CONFIG_DIR = DELIVERABLES_DIR / "config"
    
    # Verify expected directories exist
    required_dirs = {
        "Deliverables-Code": DELIVERABLES_DIR,
        "data": DATA_DIR,
        "results": RESULTS_DIR,
        "analysis": ANALYSIS_DIR,
        "config": CONFIG_DIR
    }
    
    missing_dirs = []
    for name, path in required_dirs.items():
        if path.exists():
            print(f"✓ Found {name} directory: {path}")
        else:
            print(f"⚠ Missing {name} directory: {path}")
            missing_dirs.append(name)
    
    if missing_dirs:
        print(f"\n⚠ Warning: {len(missing_dirs)} required directories not found")
        print("This may indicate the notebook is being run from an unexpected location")
    else:
        print("\n✓ All project directories located successfully")
    
    # Create analysis directory if it doesn't exist
    ANALYSIS_DIR.mkdir(parents=True, exist_ok=True)
    
    # Add project root to Python path for imports
    import sys
    if str(ROOT_DIR) not in sys.path:
        sys.path.append(str(ROOT_DIR))
        print(f"✓ Added project root to Python path")
    
    return ROOT_DIR

def display_project_structure():
    """Display relevant project structure for reference."""
    print("\n=== Project Structure (Key Directories) ===")
    print(f"ROOT_DIR:         {ROOT_DIR}")
    print(f"DELIVERABLES_DIR: {DELIVERABLES_DIR}")
    print(f"DATA_DIR:         {DATA_DIR}")
    print(f"RESULTS_DIR:      {RESULTS_DIR}")
    print(f"ANALYSIS_DIR:     {ANALYSIS_DIR}")
    print(f"CONFIG_DIR:       {CONFIG_DIR}")
    
    # Show counts of files in key directories
    if RESULTS_DIR.exists():
        result_files = list(RESULTS_DIR.glob("*.json"))
        print(f"\nResult files found: {len(result_files)}")
        
    if ANALYSIS_DIR.exists():
        analysis_files = list(ANALYSIS_DIR.glob("*.json"))
        print(f"Analysis files found: {len(analysis_files)}")
        
    if (DATA_DIR / "images" / "metadata").exists():
        metadata_files = list((DATA_DIR / "images" / "metadata").glob("*.csv"))
        print(f"Metadata files found: {len(metadata_files)}")

# Run root directory detection and path setup
print("=== Root Directory Detection & Path Setup ===")
project_root = setup_project_paths()
display_project_structure()

print(f"\n🎯 Ready to proceed with analysis from: {ROOT_DIR.name}")

In [3]:
# Import standard libraries for data analysis and visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from scipy import stats
from scipy.stats import ttest_ind, mannwhitneyu, kruskal
import json
import yaml
from pathlib import Path
import warnings
from datetime import datetime
from typing import Dict, List, Tuple, Any, Optional
import re
from collections import defaultdict, Counter
import itertools

# Statistical and machine learning utilities
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Progress tracking
from tqdm import tqdm

# Configure plotting parameters and styles
plt.style.use('default')  # Start with clean default style

# Set up matplotlib and seaborn styling
plt.rcParams.update({
    'figure.figsize': (12, 8),
    'figure.dpi': 100,
    'font.size': 11,
    'axes.titlesize': 14,
    'axes.labelsize': 12,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'legend.fontsize': 10,
    'legend.title_fontsize': 11,
    'axes.grid': True,
    'grid.alpha': 0.3,
    'lines.linewidth': 2,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'figure.facecolor': 'white',
    'axes.facecolor': 'white'
})

# Set seaborn style and palette
sns.set_style("whitegrid")
sns.set_palette("husl")

# Custom color palette for consistent visualization
ANALYSIS_COLORS = {
    'LMM': '#2E86AB',        # Blue for LMM models
    'OCR': '#A23B72',        # Purple for OCR models
    'Pixtral': '#2E86AB',    # Blue for Pixtral
    'Llama': '#00A6D6',      # Light blue for Llama
    'DocTR': '#A23B72',      # Purple for DocTR
    'accuracy': '#28A745',    # Green for accuracy metrics
    'cer': '#DC3545',        # Red for error metrics
    'work_order': '#FD7E14',  # Orange for work order
    'total_cost': '#6F42C1',  # Purple for total cost
    'baseline': '#6C757D',    # Gray for baseline/reference
    'improvement': '#20C997'   # Teal for improvements
}

# Configure warnings
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

# Display configuration
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 50)

print("✓ All libraries imported successfully")
print("✓ Plotting parameters configured")
print("✓ Custom color palette defined")
print("✓ Analysis environment ready")

# Show available color palette
print(f"\n📊 Available analysis colors: {list(ANALYSIS_COLORS.keys())}")
print("🎨 Visualization settings optimized for analysis reports")

In [4]:
import logging
import sys
from datetime import datetime

def setup_analysis_logging():
    """Configure logging for the analysis process with multiple output destinations."""
    
    # Create logs directory if it doesn't exist
    logs_dir = ANALYSIS_DIR / "logs"
    logs_dir.mkdir(parents=True, exist_ok=True)
    
    # Generate timestamped log filename
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    log_filename = logs_dir / f"analysis_{timestamp}.log"
    
    # Clear any existing handlers
    for handler in logging.root.handlers[:]:
        logging.root.removeHandler(handler)
    
    # Configure root logger
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S'
    )
    
    # Create file handler for detailed logging
    file_handler = logging.FileHandler(log_filename, mode='w', encoding='utf-8')
    file_handler.setLevel(logging.DEBUG)
    file_formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s'
    )
    file_handler.setFormatter(file_formatter)
    
    # Create console handler for important messages
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setLevel(logging.INFO)
    console_formatter = logging.Formatter('%(levelname)s: %(message)s')
    console_handler.setFormatter(console_formatter)
    
    # Add handlers to root logger
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)
    
    return logger, log_filename

def create_section_logger(section_name: str):
    """Create a logger for a specific analysis section."""
    return logging.getLogger(f"analysis.{section_name}")

# Set up logging system
analysis_logger, log_file_path = setup_analysis_logging()

# Create loggers for different analysis sections
setup_logger = create_section_logger("setup")
data_logger = create_section_logger("data_loading")
viz_logger = create_section_logger("visualization")
stats_logger = create_section_logger("statistics")
results_logger = create_section_logger("results")

# Log initial setup
setup_logger.info("=== Analysis Framework v2.0 - Session Started ===")
setup_logger.info(f"Log file: {log_file_path}")
setup_logger.info(f"Project root: {ROOT_DIR}")
setup_logger.info(f"Analysis directory: {ANALYSIS_DIR}")

# Configure progress tracking
def log_progress(message: str, level: str = "info"):
    """Utility function to log progress with timestamp."""
    timestamp = datetime.now().strftime("%H:%M:%S")
    formatted_message = f"[{timestamp}] {message}"
    
    if level.lower() == "info":
        analysis_logger.info(formatted_message)
        print(f"ℹ️  {formatted_message}")
    elif level.lower() == "warning":
        analysis_logger.warning(formatted_message)
        print(f"⚠️  {formatted_message}")
    elif level.lower() == "error":
        analysis_logger.error(formatted_message)
        print(f"❌ {formatted_message}")
    elif level.lower() == "success":
        analysis_logger.info(formatted_message)
        print(f"✅ {formatted_message}")

# Test logging system
setup_logger.info("Logging system configured successfully")
data_logger.debug("Data logging ready")
viz_logger.debug("Visualization logging ready")
stats_logger.debug("Statistics logging ready")
results_logger.debug("Results logging ready")

print("✓ Logging system configured")
print(f"✓ Log file created: {log_file_path.name}")
print("✓ Section loggers initialized")
print("✓ Progress tracking ready")

# Display logging configuration
print(f"\n📝 Logging Details:")
print(f"   • File log level: DEBUG (detailed)")
print(f"   • Console log level: INFO (summary)")
print(f"   • Log location: {logs_dir}")
print(f"   • Current session: {log_file_path.name}")

log_progress("Analysis logging system ready", "info")

In [5]:
def load_ground_truth_data(ground_truth_file: str = None) -> pd.DataFrame:
    """Load and validate ground truth CSV data."""
    data_logger.info("Loading ground truth data")
    
    # Set default ground truth file path using ROOT_DIR
    if ground_truth_file is None:
        ground_truth_file = DATA_DIR / "images" / "metadata" / "ground_truth.csv"
    else:
        ground_truth_file = Path(ground_truth_file)
    
    if not ground_truth_file.exists():
        raise FileNotFoundError(f"Ground truth file not found: {ground_truth_file}")
    
    try:
        # Load with explicit string type for filename column to ensure consistent matching
        ground_truth = pd.read_csv(ground_truth_file, dtype={'filename': str})
        
        # Validate required columns
        required_columns = {'filename', 'work_order_number', 'total'}
        missing_columns = required_columns - set(ground_truth.columns)
        if missing_columns:
            raise ValueError(f"Missing required columns in ground truth: {missing_columns}")
        
        # Clean and validate data
        ground_truth['filename'] = ground_truth['filename'].str.strip()
        ground_truth['work_order_number'] = ground_truth['work_order_number'].astype(str).str.strip()
        
        data_logger.info(f"Loaded ground truth data: {len(ground_truth)} records")
        return ground_truth
        
    except Exception as e:
        data_logger.error(f"Error loading ground truth data: {e}")
        raise

def discover_results_files() -> Dict[str, List[Path]]:
    """Discover all results files organized by model type."""
    data_logger.info("Discovering results files")
    
    results_files = {
        'pixtral': [],
        'llama': [],
        'doctr': [],
        'all': []
    }
    
    # Get all results JSON files
    all_files = list(RESULTS_DIR.glob("results-*.json"))
    
    for file in all_files:
        results_files['all'].append(file)
        
        # Categorize by model type based on filename pattern
        if 'pixtral' in file.name:
            results_files['pixtral'].append(file)
        elif 'llama' in file.name:
            results_files['llama'].append(file)
        elif 'doctr' in file.name:
            results_files['doctr'].append(file)
    
    # Sort files by modification time (newest first)
    for model_type in results_files:
        results_files[model_type].sort(key=lambda x: x.stat().st_mtime, reverse=True)
    
    data_logger.info(f"Found {len(results_files['all'])} total results files")
    for model_type, files in results_files.items():
        if model_type != 'all' and files:
            data_logger.info(f"  {model_type}: {len(files)} files")
    
    return results_files

def discover_analysis_files() -> Dict[str, List[Path]]:
    """Discover all analysis files organized by model type."""
    data_logger.info("Discovering analysis files")
    
    analysis_files = {
        'pixtral': [],
        'llama': [],
        'doctr': [],
        'all': []
    }
    
    # Get all analysis JSON files
    all_files = list(ANALYSIS_DIR.glob("analysis-*.json"))
    
    for file in all_files:
        analysis_files['all'].append(file)
        
        # Categorize by model type based on filename pattern
        if 'pixtral' in file.name:
            analysis_files['pixtral'].append(file)
        elif 'llama' in file.name:
            analysis_files['llama'].append(file)
        elif 'doctr' in file.name:
            analysis_files['doctr'].append(file)
    
    # Sort files by modification time (newest first)
    for model_type in analysis_files:
        analysis_files[model_type].sort(key=lambda x: x.stat().st_mtime, reverse=True)
    
    data_logger.info(f"Found {len(analysis_files['all'])} total analysis files")
    for model_type, files in analysis_files.items():
        if model_type != 'all' and files:
            data_logger.info(f"  {model_type}: {len(files)} files")
    
    return analysis_files

def load_results_file(file_path: Path) -> Dict[str, Any]:
    """Load and validate a results JSON file."""
    data_logger.debug(f"Loading results file: {file_path.name}")
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        # Validate structure
        required_keys = {'metadata', 'results'}
        missing_keys = required_keys - set(data.keys())
        if missing_keys:
            raise ValueError(f"Missing required keys in results file: {missing_keys}")
        
        # Add file metadata
        data['file_info'] = {
            'filename': file_path.name,
            'file_path': str(file_path),
            'file_size_mb': round(file_path.stat().st_size / (1024*1024), 2),
            'modification_time': datetime.fromtimestamp(file_path.stat().st_mtime).isoformat()
        }
        
        data_logger.debug(f"Loaded results file with {len(data['results'])} results")
        return data
        
    except Exception as e:
        data_logger.error(f"Error loading results file {file_path}: {e}")
        raise

def load_analysis_file(file_path: Path) -> Dict[str, Any]:
    """Load and validate an analysis JSON file."""
    data_logger.debug(f"Loading analysis file: {file_path.name}")
    
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        
        # Validate structure
        required_keys = {'metadata', 'summary', 'extracted_data'}
        missing_keys = required_keys - set(data.keys())
        if missing_keys:
            raise ValueError(f"Missing required keys in analysis file: {missing_keys}")
        
        # Add file metadata
        data['file_info'] = {
            'filename': file_path.name,
            'file_path': str(file_path),
            'file_size_mb': round(file_path.stat().st_size / (1024*1024), 2),
            'modification_time': datetime.fromtimestamp(file_path.stat().st_mtime).isoformat()
        }
        
        data_logger.debug(f"Loaded analysis file with {len(data['extracted_data'])} analyzed results")
        return data
        
    except Exception as e:
        data_logger.error(f"Error loading analysis file {file_path}: {e}")
        raise

def load_all_results(model_types: List[str] = None) -> Dict[str, List[Dict]]:
    """Load all results files for specified model types."""
    data_logger.info("Loading all results files")
    
    if model_types is None:
        model_types = ['pixtral', 'llama', 'doctr']
    
    results_files = discover_results_files()
    all_results = {}
    
    for model_type in model_types:
        if model_type in results_files:
            all_results[model_type] = []
            for file_path in results_files[model_type]:
                try:
                    result_data = load_results_file(file_path)
                    all_results[model_type].append(result_data)
                except Exception as e:
                    data_logger.warning(f"Skipping corrupted results file {file_path}: {e}")
    
    total_loaded = sum(len(results) for results in all_results.values())
    data_logger.info(f"Loaded {total_loaded} results files across {len(all_results)} model types")
    
    return all_results

def load_all_analysis(model_types: List[str] = None) -> Dict[str, List[Dict]]:
    """Load all analysis files for specified model types."""
    data_logger.info("Loading all analysis files")
    
    if model_types is None:
        model_types = ['pixtral', 'llama', 'doctr']
    
    analysis_files = discover_analysis_files()
    all_analysis = {}
    
    for model_type in model_types:
        if model_type in analysis_files:
            all_analysis[model_type] = []
            for file_path in analysis_files[model_type]:
                try:
                    analysis_data = load_analysis_file(file_path)
                    all_analysis[model_type].append(analysis_data)
                except Exception as e:
                    data_logger.warning(f"Skipping corrupted analysis file {file_path}: {e}")
    
    total_loaded = sum(len(analyses) for analyses in all_analysis.values())
    data_logger.info(f"Loaded {total_loaded} analysis files across {len(all_analysis)} model types")
    
    return all_analysis

def select_files_interactive(file_type: str = "results") -> List[Path]:
    """Interactive file selection for analysis."""
    if file_type == "results":
        files_dict = discover_results_files()
        title = "Available Results Files"
    elif file_type == "analysis":
        files_dict = discover_analysis_files()
        title = "Available Analysis Files"
    else:
        raise ValueError("file_type must be 'results' or 'analysis'")
    
    all_files = files_dict['all']
    if not all_files:
        print(f"No {file_type} files found.")
        return []
    
    print(f"\n{title}:")
    print("-" * 50)
    for i, file_path in enumerate(all_files, 1):
        # Extract model info from filename
        model_info = ""
        if 'pixtral' in file_path.name:
            model_info = " [Pixtral]"
        elif 'llama' in file_path.name:
            model_info = " [Llama]"
        elif 'doctr' in file_path.name:
            model_info = " [DocTR]"
        
        # Get file modification time
        mod_time = datetime.fromtimestamp(file_path.stat().st_mtime)
        print(f"{i:2d}. {file_path.name}{model_info}")
        print(f"     Modified: {mod_time.strftime('%Y-%m-%d %H:%M:%S')}")
    
    print(f"\n{len(all_files) + 1}. Load all files")
    
    while True:
        try:
            choice = input(f"\nSelect files (comma-separated numbers, or {len(all_files) + 1} for all): ")
            
            if choice.strip() == str(len(all_files) + 1):
                return all_files
            
            # Parse comma-separated choices
            choices = [int(x.strip()) for x in choice.split(',')]
            selected_files = []
            
            for choice_num in choices:
                if 1 <= choice_num <= len(all_files):
                    selected_files.append(all_files[choice_num - 1])
                else:
                    print(f"Invalid choice: {choice_num}")
                    continue
            
            if selected_files:
                print(f"\nSelected {len(selected_files)} file(s):")
                for file_path in selected_files:
                    print(f"  - {file_path.name}")
                return selected_files
            else:
                print("No valid files selected.")
                
        except ValueError:
            print("Please enter valid numbers separated by commas.")

def create_comprehensive_dataset() -> Dict[str, Any]:
    """Create a comprehensive dataset combining all available data."""
    data_logger.info("Creating comprehensive dataset")
    
    # Load ground truth
    ground_truth = load_ground_truth_data()
    
    # Load all analysis files (which contain the processed results)
    all_analysis = load_all_analysis()
    
    # Create comprehensive dataset structure
    dataset = {
        'ground_truth': ground_truth,
        'model_data': {},
        'metadata': {
            'created_timestamp': datetime.now().isoformat(),
            'total_models': 0,
            'total_experiments': 0,
            'data_sources': {
                'ground_truth_file': str(DATA_DIR / "images" / "metadata" / "ground_truth.csv"),
                'results_directory': str(RESULTS_DIR),
                'analysis_directory': str(ANALYSIS_DIR)
            }
        }
    }
    
    total_experiments = 0
    for model_type, analyses in all_analysis.items():
        if analyses:
            dataset['model_data'][model_type] = analyses
            total_experiments += len(analyses)
            data_logger.info(f"Added {len(analyses)} experiments for {model_type}")
    
    dataset['metadata']['total_models'] = len(dataset['model_data'])
    dataset['metadata']['total_experiments'] = total_experiments
    
    data_logger.info(f"Comprehensive dataset created with {dataset['metadata']['total_models']} models and {total_experiments} experiments")
    
    return dataset

# Initialize data loading and create comprehensive dataset
log_progress("Initializing data loading functions", "info")

# Verify data directories exist
required_dirs = [RESULTS_DIR, ANALYSIS_DIR, DATA_DIR / "images" / "metadata"]
for dir_path in required_dirs:
    if not dir_path.exists():
        log_progress(f"Creating missing directory: {dir_path}", "warning")
        dir_path.mkdir(parents=True, exist_ok=True)

# Discover available data files
available_results = discover_results_files()
available_analysis = discover_analysis_files()

# Load ground truth data
try:
    GROUND_TRUTH_DATA = load_ground_truth_data()
    log_progress(f"Ground truth loaded: {len(GROUND_TRUTH_DATA)} records", "success")
except Exception as e:
    log_progress(f"Warning: Could not load ground truth data: {e}", "warning")
    GROUND_TRUTH_DATA = None

# Create comprehensive dataset for analysis
try:
    COMPREHENSIVE_DATASET = create_comprehensive_dataset()
    log_progress("Comprehensive dataset created successfully", "success")
except Exception as e:
    log_progress(f"Warning: Could not create comprehensive dataset: {e}", "warning")
    COMPREHENSIVE_DATASET = None

# Display summary of available data
print("\n📊 Data Loading Summary:")
print(f"   • Ground truth records: {len(GROUND_TRUTH_DATA) if GROUND_TRUTH_DATA is not None else 'Not available'}")
print(f"   • Results files found: {len(available_results['all'])}")
print(f"   • Analysis files found: {len(available_analysis['all'])}")

if available_results['all']:
    print("\n   Results by model type:")
    for model_type, files in available_results.items():
        if model_type != 'all' and files:
            print(f"     - {model_type.title()}: {len(files)} files")

if available_analysis['all']:
    print("\n   Analysis by model type:")
    for model_type, files in available_analysis.items():
        if model_type != 'all' and files:
            print(f"     - {model_type.title()}: {len(files)} files")

print("\n✅ Data loading functions ready for analysis")

In [6]:
# Create Primary Performance Comparison Bar Chart
# Side-by-side comparison of total accuracy for all LMM trials vs all OCR trials
# Roll up across all prompts and queries


In [7]:
# Create Model Type Breakdown Bar Chart
# Break down into model types within each category
# (LMM-Pixtral, LMM-Llama, OCR with all 7 recognition models)
# Group by category and order by performance


In [8]:
# Create Complete Model Performance Bar Chart
# All models organized by performance, color coded by category (LMM vs OCR only)
# Include 85% accuracy reference line for industry automation standards


In [9]:
# Create LMM Models vs Prompts Heatmap (Accuracy)
# Pixtral/Llama (rows) × Prompt types (columns) with accuracy values


In [10]:
# Create LMM Models vs Prompts Heatmap (CER)
# Pixtral/Llama (rows) × Prompt types (columns) with CER values


In [11]:
# Create LMM Prompts vs Query Heatmap (Accuracy)
# Prompt types (rows) × Query types (Work Order/Total Cost) with accuracy values


In [12]:
# Create LMM Prompts vs Query Heatmap (CER)
# Prompt types (rows) × Query types (Work Order/Total Cost) with CER values


In [13]:
# Create All Models vs Query Heatmap (Accuracy)
# All models including OCR (rows) × Query types (columns) with accuracy values


In [14]:
# Create All Models vs Query Heatmap (CER)
# All models including OCR (rows) × Query types (columns) with CER values


In [15]:
# Create Coefficient of Variation Bar Chart
# Performance stability across prompts for each model


In [16]:
# Create Min-Max Range Visualization
# Performance ranges to identify most/least consistent models


In [17]:
# Create Error Pattern Examples visualization
# Visual examples of each error category with actual vs. expected results


In [18]:
# Create Post-Processing Opportunity Assessment
# Estimate potential accuracy improvements for each error type


In [19]:
# Create Error Type Distribution Pie Charts
# Separate charts for Work Order vs. Total Cost errors


In [20]:
# Create Error Frequency Heatmap
# Error types (rows) × Models (columns)


In [21]:
# Create Failure Severity Distribution
# Histogram of error magnitudes


In [22]:
# Create Model Robustness Comparison
# How models handle edge cases


In [23]:
# Create Prompt Performance Matrix
# Accuracy gains/losses by prompt type across models


In [24]:
# Create Prompt-Model Interaction Effects
# Line graphs showing how each model responds to different prompts


In [25]:
# Create Field Performance Comparison
# Side-by-side accuracy for each field across all models


In [26]:
# Create Performance Gap Analysis
# Difference between Total Cost and Work Order accuracy by model


In [27]:
# Create CER Distribution Histograms
# Separate for Work Order and Total Cost


In [28]:
# Create Model CER Comparison Box Plots
# Show ranges and outliers


In [29]:
# Create Efficiency Frontier Plot
# Accuracy vs. computational cost scatter plot


In [30]:
# Create Cost-Benefit Analysis
# ROI calculations for different model choices


In [31]:
# Create Performance Distribution Box Plots
# Accuracy ranges across all model/prompt combinations


In [32]:
# Create Statistical Significance Matrix
# P-values for key comparisons


In [33]:
# Create Multi-Criteria Decision Matrix
# Weighted scoring across accuracy, speed, cost


In [34]:
# Create Use Case Recommendations
# Different models for different deployment scenarios


In [35]:
# Create Improvement Opportunity Matrix
# Effort vs. Impact for different enhancement areas


In [36]:
# Create Implementation Timeline
# Suggested sequence for system improvements


In [37]:
# Create Unexpected Findings Highlight
# Key discoveries and their implications


In [38]:
# Create Future Research Opportunities
# Areas identified for continued investigation
