In [None]:
import os
import subprocess
import sys
from pathlib import Path
import time
import json
from datetime import datetime

def run_notebook(notebook_path, output_dir=None, timeout=600, kernel_name=None, allow_errors=False):
    """
    Execute a Jupyter notebook using nbconvert with enhanced error handling.
    
    Args:
        notebook_path: Path to the notebook file
        output_dir: Directory to save output notebooks (optional)
        timeout: Execution timeout in seconds (default: 600)
        kernel_name: Specific kernel to use (optional)
        allow_errors: Continue execution even if cells fail (optional)
    
    Returns:
        Tuple of (notebook_name, success_status, error_message, execution_time)
    """
    notebook_name = Path(notebook_path).stem
    
    try:
        # Build command with proper list format for Windows paths with spaces
        cmd = [
            "jupyter", "nbconvert", 
            "--to", "notebook", 
            "--execute",
            f"--ExecutePreprocessor.timeout={timeout}",
            "--ExecutePreprocessor.kernel_name=python3"
        ]
        
        if allow_errors:
            cmd.append("--ExecutePreprocessor.allow_errors=True")
        
        if output_dir:
            cmd.extend(["--output-dir", output_dir])
        
        cmd.append(notebook_path)
        
        print(f"‚ñ∂Ô∏è  Running: {notebook_name}")
        start_time = time.time()
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout + 60)
        execution_time = time.time() - start_time
        
        if result.returncode == 0:
            print(f"‚úÖ Completed: {notebook_name}")
            return (notebook_name, True, None, execution_time)
        else:
            error_msg = result.stderr or result.stdout
            print(f"‚ùå Failed: {notebook_name}")
            print(f"Error: {error_msg[:500]}")  # Print first 500 chars of error
            return (notebook_name, False, error_msg, execution_time)
    
    except subprocess.TimeoutExpired:
        print(f"‚ùå Timeout: {notebook_name} exceeded {timeout} seconds")
        return (notebook_name, False, f"Execution timeout after {timeout} seconds", timeout)
    
    except Exception as e:
        print(f"‚ùå Error executing {notebook_name}: {str(e)}")
        return (notebook_name, False, str(e), 0)


def profile_notebook(notebook_path):
    """
    Analyze notebook to estimate execution time and identify long-running cells.
    
    Args:
        notebook_path: Path to the notebook file
    
    Returns:
        Dictionary with profiling information
    """
    try:
        import nbformat
        
        with open(notebook_path, 'r', encoding='utf-8') as f:
            nb = nbformat.read(f, as_version=4)
        
        code_cells = [cell for cell in nb.cells if cell.cell_type == 'code']
        total_lines = sum(len(cell.source.split('\n')) for cell in code_cells)
        
        return {
            'total_cells': len(nb.cells),
            'code_cells': len(code_cells),
            'total_lines': total_lines,
            'has_heavy_ops': any(
                keyword in str(cell.source).lower() 
                for cell in code_cells 
                for keyword in ['groupby', 'merge', 'apply', 'loop', 'for ', 'while ']
            )
        }
    except Exception as e:
        return {'error': str(e)}


def main():
    """
    Main execution function with enhanced timeout and performance management.
    """
    # Configuration
    notebook_folder = r"D:\OneDrive - Tonik Financial Pte Ltd\MyStuff\Data Engineering\Model_Monitoring\Gini Monitoring\Gini_Monitoring_Modular_Approach\Notebooks"
    master_notebook = "CIC_SIL_Models.ipynb"
    output_folder = "./outputs"
    
    # PERFORMANCE SETTINGS - Adjust based on your needs
    config = {
        'master_timeout': 2200,        # Increased: 20 minutes for master notebook
        'standard_timeout': 1900,        # Increased: 15 minutes for standard notebooks
        'enable_profiling': True,       # Profile notebooks before execution
        'allow_errors': False,          # Continue on cell errors
        'retry_failed': True,           # Retry failed notebooks with longer timeout
        'max_retries': 2
    }
    
    # Create output folder if it doesn't exist
    if output_folder and not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Get all notebooks in the folder
    all_notebooks = sorted(Path(notebook_folder).glob("*.ipynb"))
    
    if not all_notebooks:
        print("‚ùå No Jupyter notebooks found in the folder!")
        sys.exit(1)
    
    # Separate master notebook from others
    master_path = Path(notebook_folder) / master_notebook
    other_notebooks = [nb for nb in all_notebooks if nb.name != master_notebook]
    
    if not master_path.exists():
        print(f"‚ùå Master notebook '{master_notebook}' not found!")
        sys.exit(1)
    
    print(f"\nüìã Found {len(all_notebooks)} notebooks total")
    print(f"üéØ Master notebook: {master_notebook}")
    print(f"üîó Other notebooks to run: {len(other_notebooks)}")
    print(f"‚è±Ô∏è  Master timeout: {config['master_timeout']}s | Standard timeout: {config['standard_timeout']}s\n")
    
    # Track all results
    all_results = []
    total_start_time = time.time()
    
    # Optional: Profile notebooks
    if config['enable_profiling']:
        print("=" * 60)
        print("STEP 0: Profiling Notebooks")
        print("=" * 60)
        for notebook in [master_path] + other_notebooks:
            profile_info = profile_notebook(str(notebook))
            if 'error' not in profile_info:
                print(f"üìä {notebook.name}: {profile_info['code_cells']} code cells, "
                      f"{profile_info['total_lines']} lines, "
                      f"Heavy ops: {profile_info['has_heavy_ops']}")
        print()
    
    # Step 1: Run master notebook first
    print("=" * 60)
    print("STEP 1: Running Master Notebook")
    print("=" * 60)
    
    master_result = run_notebook(
        str(master_path), 
        output_folder, 
        timeout=config['master_timeout'],
        allow_errors=config['allow_errors']
    )
    all_results.append(master_result)
    
    print(f"‚è±Ô∏è  Master notebook took {master_result[3]:.2f} seconds\n")
    
    if not master_result[1]:
        if config['retry_failed'] and config['max_retries'] > 0:
            print(f"‚ö†Ô∏è  Master notebook failed. Retrying with increased timeout ({config['master_timeout'] * 1.5}s)...\n")
            master_result = run_notebook(
                str(master_path), 
                output_folder, 
                timeout=int(config['master_timeout'] * 1.5),
                allow_errors=True
            )
            all_results[-1] = master_result
            print(f"‚è±Ô∏è  Master notebook retry took {master_result[3]:.2f} seconds\n")
        
        if not master_result[1]:
            print("‚ùå Master notebook failed after retries! Exiting...")
            sys.exit(1)
    
    # Step 2: Run other notebooks sequentially
    print("=" * 60)
    print("STEP 2: Running Other Notebooks Sequentially")
    print("=" * 60)
    
    if not other_notebooks:
        print("‚ÑπÔ∏è  No other notebooks to run.")
    else:
        for idx, notebook in enumerate(other_notebooks, 1):
            print(f"\n[{idx}/{len(other_notebooks)}]")
            
            result = run_notebook(
                str(notebook), 
                output_folder, 
                timeout=config['standard_timeout'],
                allow_errors=config['allow_errors']
            )
            all_results.append(result)
            
            print(f"‚è±Ô∏è  Took {result[3]:.2f} seconds")
            
            # Retry logic for failed notebooks
            if not result[1] and config['retry_failed']:
                for attempt in range(1, config['max_retries'] + 1):
                    new_timeout = int(config['standard_timeout'] * (1.5 ** attempt))
                    print(f"‚ö†Ô∏è  Retrying with timeout {new_timeout}s (Attempt {attempt}/{config['max_retries']})...")
                    
                    result = run_notebook(
                        str(notebook), 
                        output_folder, 
                        timeout=new_timeout,
                        allow_errors=True
                    )
                    all_results[-1] = result
                    
                    if result[1]:
                        print(f"‚úÖ Retry successful! Took {result[3]:.2f} seconds")
                        break
                    print(f"‚è±Ô∏è  Retry took {result[3]:.2f} seconds")
                
                if not result[1]:
                    print(f"‚ö†Ô∏è  Notebook {notebook.name} failed after {config['max_retries']} retries. Continuing...\n")
    
    # Summary
    total_elapsed = time.time() - total_start_time
    successful = sum(1 for _, success, _, _ in all_results if success)
    failed = len(all_results) - successful
    
    print("\n" + "=" * 60)
    print("SUMMARY")
    print("=" * 60)
    print(f"‚úÖ Successful: {successful}/{len(all_results)}")
    print(f"‚ùå Failed: {failed}/{len(all_results)}")
    
    if failed > 0:
        print("\nFailed notebooks:")
        for name, success, error, exec_time in all_results:
            if not success:
                error_preview = (error[:100] + "...") if error and len(error) > 100 else error
                print(f"  - {name}: {error_preview}")
    
    # Execution time breakdown
    print(f"\nüìä Total execution time: {total_elapsed:.2f} seconds ({total_elapsed/60:.1f} minutes)")
    print(f"üìÖ Completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    # Save summary to file
    summary = {
        'timestamp': datetime.now().isoformat(),
        'total_notebooks': len(all_results),
        'successful': successful,
        'failed': failed,
        'total_time_seconds': total_elapsed,
        'results': [
            {'notebook': name, 'success': success, 'time': exec_time}
            for name, success, _, exec_time in all_results
        ]
    }
    
    with open(os.path.join(output_folder, 'execution_summary.json'), 'w') as f:
        json.dump(summary, f, indent=2)
    
    print(f"üìÅ Summary saved to: {os.path.join(output_folder, 'execution_summary.json')}")


if __name__ == "__main__":
    main()