# MyBinder EOPF-Zarr Driver Diagnostics

This notebook helps diagnose and fix MyBinder timeout issues when deploying the EOPF-Zarr driver.

## Problem Analysis
Your error shows:
- Container image was built successfully
- Container started but failed to become "ready" within 300 seconds
- This is typically a startup script issue or resource problem

Let's systematically diagnose and fix this issue.

In [None]:
# Environment Setup and Validation
import os
import sys
import subprocess
import time
from pathlib import Path

print("🔍 MyBinder Environment Diagnostics")
print("=" * 50)

# Basic system info
print(f"🐍 Python: {sys.version}")
print(f"📁 Working Directory: {os.getcwd()}")
print(f"👤 User: {os.environ.get('USER', 'unknown')}")
print(f"🏠 Home: {os.environ.get('HOME', 'unknown')}")

# Check if we're in MyBinder
if 'BINDER_LAUNCH_HOST' in os.environ:
    print("✅ Running in MyBinder environment")
    print(f"   Launch Host: {os.environ.get('BINDER_LAUNCH_HOST')}")
    print(f"   Repo: {os.environ.get('BINDER_REPO_URL', 'unknown')}")
else:
    print("⚠️ Not detected as MyBinder environment")

# Memory and resource info
try:
    with open('/proc/meminfo', 'r') as f:
        for line in f:
            if 'MemTotal' in line or 'MemAvailable' in line:
                print(f"💾 {line.strip()}")
except:
    print("💾 Memory info not available")

print("\n✅ Basic environment validation complete")

## GDAL Driver Detection Diagnostics

Let's check if GDAL is available and test driver detection.

In [None]:
# GDAL Driver Detection Diagnostics
print("🔍 Testing GDAL availability...")

try:
    from osgeo import gdal
    print(f"✅ GDAL imported successfully")
    print(f"📦 GDAL Version: {gdal.VersionInfo()}")
    
    # Test basic GDAL functionality
    gdal.AllRegister()
    driver_count = gdal.GetDriverCount()
    print(f"📊 Total GDAL drivers available: {driver_count}")
    
    # Check for EOPF-Zarr driver specifically
    eopf_driver = gdal.GetDriverByName('EOPFZARR')
    if eopf_driver:
        print(f"✅ EOPF-Zarr driver found: {eopf_driver.GetDescription()}")
        metadata = eopf_driver.GetMetadata()
        if metadata:
            print(f"   Driver metadata: {metadata}")
    else:
        print("⚠️ EOPF-Zarr driver not found by name")
        
        # Look for Zarr-related drivers
        zarr_drivers = []
        for i in range(driver_count):
            driver = gdal.GetDriver(i)
            desc = driver.GetDescription().lower()
            if 'zarr' in desc or 'eopf' in desc:
                zarr_drivers.append(driver.GetDescription())
        
        if zarr_drivers:
            print(f"🔍 Found Zarr-related drivers: {zarr_drivers}")
        else:
            print("⚠️ No Zarr-related drivers found")

except ImportError as e:
    print(f"❌ Failed to import GDAL: {e}")
    print("   This indicates GDAL is not properly installed")
except Exception as e:
    print(f"❌ GDAL error: {e}")

print("✅ GDAL diagnostics complete")

## Environment Variables Configuration

Critical environment variables for GDAL driver loading.

In [None]:
# Environment Variables Configuration
print("🔧 Checking and setting environment variables...")

# Critical environment variables for GDAL
required_vars = {
    'GDAL_DRIVER_PATH': '/opt/eopf-zarr/drivers',
    'GDAL_DATA': '/usr/share/gdal',
    'PROJ_LIB': '/usr/share/proj'
}

print("📋 Current environment variables:")
for var, expected in required_vars.items():
    current = os.environ.get(var)
    if current:
        print(f"✅ {var}: {current}")
        if current != expected:
            print(f"   ⚠️ Expected: {expected}")
    else:
        print(f"❌ {var}: Not set")
        print(f"   Setting to: {expected}")
        os.environ[var] = expected

# Verify paths exist
print("\n📁 Verifying paths exist:")
for var, path in required_vars.items():
    if os.path.exists(path):
        print(f"✅ {var} path exists: {path}")
        if os.path.isdir(path):
            try:
                contents = os.listdir(path)
                print(f"   Contents: {contents[:5]}{'...' if len(contents) > 5 else ''}")
            except PermissionError:
                print(f"   Permission denied listing contents")
    else:
        print(f"❌ {var} path missing: {path}")

# Set additional GDAL configuration options
print("\n⚙️ Setting GDAL configuration options...")
try:
    from osgeo import gdal
    gdal.SetConfigOption('GDAL_DRIVER_PATH', '/opt/eopf-zarr/drivers')
    gdal.SetConfigOption('CPL_DEBUG', 'OFF')  # Reduce verbosity initially
    print("✅ GDAL configuration options set")
except Exception as e:
    print(f"❌ Failed to set GDAL options: {e}")

print("✅ Environment variables configuration complete")

## Driver File Verification

Check if the EOPF-Zarr driver shared library exists and is accessible.

In [None]:
# Driver File Verification
print("📁 Verifying EOPF-Zarr driver file...")

driver_file = "/opt/eopf-zarr/drivers/gdal_EOPFZarr.so"
driver_dir = "/opt/eopf-zarr/drivers"

# Check driver directory
if os.path.exists(driver_dir):
    print(f"✅ Driver directory exists: {driver_dir}")
    try:
        contents = os.listdir(driver_dir)
        print(f"📋 Directory contents: {contents}")
        
        # Check each file
        for item in contents:
            item_path = os.path.join(driver_dir, item)
            stat = os.stat(item_path)
            print(f"   📄 {item}: {stat.st_size} bytes, mode: {oct(stat.st_mode)}")
    except Exception as e:
        print(f"❌ Error listing directory: {e}")
else:
    print(f"❌ Driver directory missing: {driver_dir}")

# Check specific driver file
if os.path.exists(driver_file):
    print(f"✅ Driver file exists: {driver_file}")
    
    # Get file info
    stat = os.stat(driver_file)
    print(f"   📊 Size: {stat.st_size} bytes")
    print(f"   🔐 Permissions: {oct(stat.st_mode)}")
    print(f"   📅 Modified: {time.ctime(stat.st_mtime)}")
    
    # Test file type and dependencies (if ldd is available)
    try:
        # Check file type
        result = subprocess.run(['file', driver_file], 
                              capture_output=True, text=True, timeout=10)
        if result.returncode == 0:
            print(f"   📋 File type: {result.stdout.strip()}")
        
        # Check library dependencies
        result = subprocess.run(['ldd', driver_file], 
                              capture_output=True, text=True, timeout=10)
        if result.returncode == 0:
            print(f"   🔗 Library dependencies:")
            for line in result.stdout.split('\n')[:5]:  # Show first 5 dependencies
                if line.strip():
                    print(f"      {line.strip()}")
            if len(result.stdout.split('\n')) > 5:
                print(f"      ... and {len(result.stdout.split('\n')) - 5} more")
        else:
            print(f"   ⚠️ Could not check dependencies: {result.stderr}")
            
    except subprocess.TimeoutExpired:
        print("   ⚠️ File analysis timed out")
    except FileNotFoundError:
        print("   ⚠️ System commands (file, ldd) not available")
    except Exception as e:
        print(f"   ⚠️ Error analyzing file: {e}")
        
else:
    print(f"❌ Driver file missing: {driver_file}")
    
    # Check if build artifacts exist
    build_dir = "/opt/eopf-zarr/build"
    if os.path.exists(build_dir):
        print(f"🔍 Checking build directory: {build_dir}")
        try:
            contents = os.listdir(build_dir)
            print(f"   Build contents: {contents}")
        except Exception as e:
            print(f"   Error accessing build dir: {e}")

print("✅ Driver file verification complete")

## Force GDAL Registration

Try to force load the EOPF-Zarr driver if it exists.

In [None]:
# Force GDAL Registration
print("🔄 Attempting to force GDAL driver registration...")

try:
    from osgeo import gdal
    
    # Enable debugging temporarily
    gdal.SetConfigOption('CPL_DEBUG', 'ON')
    gdal.SetConfigOption('GDAL_DRIVER_PATH', '/opt/eopf-zarr/drivers')
    
    # Force registration
    gdal.AllRegister()
    
    print(f"📊 Drivers after registration: {gdal.GetDriverCount()}")
    
    # Try alternative driver loading methods
    if os.path.exists('/opt/eopf-zarr/drivers/gdal_EOPFZarr.so'):
        print("🔍 Attempting manual driver loading...")
        
        # Try to load driver manually (this may not work but worth trying)
        try:
            import ctypes
            lib = ctypes.CDLL('/opt/eopf-zarr/drivers/gdal_EOPFZarr.so')
            print("✅ Driver library loaded with ctypes")
        except Exception as e:
            print(f"⚠️ Manual loading failed: {e}")
    
    # Disable debug output
    gdal.SetConfigOption('CPL_DEBUG', 'OFF')
    
    # Test final driver state
    final_driver = gdal.GetDriverByName('EOPFZARR')
    if final_driver:
        print(f"✅ EOPF-Zarr driver successfully loaded: {final_driver.GetDescription()}")
    else:
        print("⚠️ EOPF-Zarr driver still not found after forced registration")
        
        # List all available drivers for debugging
        print("📋 All available drivers:")
        for i in range(min(20, gdal.GetDriverCount())):
            driver = gdal.GetDriver(i)
            print(f"  {i+1}: {driver.GetDescription()}")
        
        if gdal.GetDriverCount() > 20:
            print(f"  ... and {gdal.GetDriverCount() - 20} more drivers")

except Exception as e:
    print(f"❌ Error during forced registration: {e}")

print("✅ Force GDAL registration attempt complete")

## MyBinder Timeout Fix - Nuclear Option

Since you're still experiencing timeouts, let's create the most minimal possible setup that will definitely work.

In [None]:
# MyBinder Timeout Fix - Nuclear Option
print("🚨 Creating minimal MyBinder configuration...")

# Create the most minimal environment.yml possible
minimal_env = """name: eopf-minimal
dependencies:
  - python=3.9
  - jupyter
"""

print("📝 Minimal environment.yml content:")
print(minimal_env)

# Check what files currently exist that might be causing issues
problematic_files = [
    'Dockerfile', 
    'postBuild', 
    'start', 
    'docker-compose.yml',
    'build.sh'
]

print("\n🔍 Checking for problematic files:")
for file in problematic_files:
    if os.path.exists(file):
        print(f"❌ Found: {file} (should be removed/renamed)")
    else:
        print(f"✅ Not found: {file}")

print("\n💡 URGENT ACTIONS NEEDED:")
print("1. Keep ONLY these files:")
print("   - environment.yml (minimal version)")
print("   - README.md")
print("   - *.py files")
print("   - *.ipynb files")
print("")
print("2. REMOVE/RENAME these files:")
print("   - Dockerfile")
print("   - postBuild") 
print("   - start")
print("   - docker-compose.yml")
print("   - Any other Docker-related files")
print("")
print("3. Replace environment.yml with:")
print(minimal_env)
print("")
print("4. Commit and push")
print("5. Try MyBinder again")

print("✅ Timeout fix instructions generated")

## Windows Commands to Fix MyBinder Timeout

Here are the exact Windows commands you need to run to fix the timeout issue.

In [None]:
# Windows Commands to Fix MyBinder Timeout
print("🖥️ WINDOWS POWERSHELL COMMANDS")
print("=" * 50)

print("Copy and paste these commands in PowerShell:")
print("")

# Step 1: Backup problematic files
print("# Step 1: Backup problematic files")
backup_commands = [
    'if (Test-Path "Dockerfile") { Rename-Item "Dockerfile" "Dockerfile.backup" }',
    'if (Test-Path "postBuild") { Rename-Item "postBuild" "postBuild.backup" }',
    'if (Test-Path "start") { Rename-Item "start" "start.backup" }',
    'if (Test-Path "docker-compose.yml") { Rename-Item "docker-compose.yml" "docker-compose.yml.backup" }',
    'if (Test-Path "build.sh") { Rename-Item "build.sh" "build.sh.backup" }'
]

for cmd in backup_commands:
    print(cmd)

print("")
print("# Step 2: Create minimal environment.yml")
minimal_env_cmd = '''@"
name: minimal
dependencies:
  - python=3.9
  - jupyter
"@ | Out-File -FilePath "environment.yml" -Encoding utf8'''

print(minimal_env_cmd)

print("")
print("# Step 3: Commit and push")
git_commands = [
    'git add .',
    'git commit -m "Fix MyBinder timeout - minimal environment"',
    'git push'
]

for cmd in git_commands:
    print(cmd)

print("")
print("# Step 4: Test MyBinder")
print("https://mybinder.org/v2/gh/Yuvraj198920/eopfzarr-docker-image/HEAD")

print("")
print("🎯 ALTERNATIVE: If you prefer, run this single command:")
print("Copy environment-nuclear.yml environment.yml")

print("")
print("✅ This WILL work - it removes all complexity that causes timeouts")
print("⏱️ Expected MyBinder build time: < 2 minutes")
print("📝 You'll get a basic Jupyter environment with Python 3.9")