# Auto Pipeline Runner

Run the complete pipeline automatically after YOLO model training.

This notebook:
1. Detects when a new YOLO model is trained
2. Updates config with new model path
3. Runs the full pipeline with backups
4. Monitors progress and logs output

## Setup

In [None]:
import os
import sys
import subprocess
import yaml
from pathlib import Path
from datetime import datetime
import json

# Project paths
PROJECT_ROOT = Path('/home/ramanlab/Documents/cole/VSCode/Ramanlab-Auto-Data-Analysis')
CONFIG_PATH = PROJECT_ROOT / 'config' / 'config.yaml'
LOGS_DIR = PROJECT_ROOT / 'logs'

# Create logs directory if needed
LOGS_DIR.mkdir(parents=True, exist_ok=True)

print(f"‚úì Project root: {PROJECT_ROOT}")
print(f"‚úì Config path: {CONFIG_PATH}")
print(f"‚úì Logs directory: {LOGS_DIR}")

## Find Latest YOLO Model

In [None]:
def find_latest_yolo_model():
    """
    Find the latest trained YOLO model.
    Looks in the runs/ folder for the newest training.
    """
    yolo_base = PROJECT_ROOT / 'model' / 'YOLOProjectProboscisLegs' / 'runs' / 'obb'
    
    if not yolo_base.exists():
        print(f"‚ùå YOLO models directory not found: {yolo_base}")
        return None
    
    # Find all training folders
    train_dirs = sorted([d for d in yolo_base.iterdir() if d.is_dir() and d.name.startswith('train')])
    
    if not train_dirs:
        print(f"‚ùå No training folders found in {yolo_base}")
        return None
    
    latest = train_dirs[-1]
    model_path = latest / 'weights' / 'best.pt'
    
    if model_path.exists():
        print(f"‚úì Found latest model: {latest.name}")
        print(f"  Path: {model_path}")
        print(f"  Size: {model_path.stat().st_size / (1024*1024):.1f} MB")
        print(f"  Modified: {datetime.fromtimestamp(model_path.stat().st_mtime)}")
        return str(model_path)
    else:
        print(f"‚ùå Model file not found: {model_path}")
        return None

# Test finding model
latest_model = find_latest_yolo_model()
print(f"\nLatest model: {latest_model}")

## Update Config with New Model

In [None]:
def update_config_with_model(model_path):
    """
    Update config.yaml with new model path.
    """
    try:
        # Read current config
        with open(CONFIG_PATH, 'r') as f:
            config = yaml.safe_load(f)
        
        old_model = config.get('model_path', 'Unknown')
        
        # Update model path
        config['model_path'] = model_path
        
        # Write updated config
        with open(CONFIG_PATH, 'w') as f:
            yaml.dump(config, f, default_flow_style=False)
        
        print(f"‚úì Config updated successfully!")
        print(f"  Old model: {old_model}")
        print(f"  New model: {model_path}")
        return True
    except Exception as e:
        print(f"‚ùå Error updating config: {e}")
        return False

# Optional: update config if new model found
if latest_model:
    print("Ready to update config. Run next cell when ready.")
else:
    print("‚ö† No model found - find your model path and enter it manually below.")

## Option A: Auto-Update Config (Recommended)

Update config with latest model automatically:

In [None]:
# Auto-update with latest model
if latest_model:
    update_config_with_model(latest_model)
else:
    print("‚ùå No model found. Please manually enter model path.")

## Option B: Manual Model Path

If auto-detect didn't work, paste your model path here:

In [None]:
# EDIT THIS with your model path
manual_model_path = "/home/ramanlab/Documents/cole/model/YOLOProjectProboscisLegs/runs/obb/train10/weights/best.pt"

# Uncomment to use manual path
# update_config_with_model(manual_model_path)

## Run Pipeline

In [None]:
def run_pipeline():
    """
    Run the complete pipeline with make run.
    Includes automatic CSV backups before and after.
    """
    print("="*70)
    print("STARTING PIPELINE")
    print("="*70)
    print(f"Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"Config: {CONFIG_PATH}")
    print("\nThis will:")
    print("  1. Backup CSVs before processing")
    print("  2. Run full analysis pipeline")
    print("  3. Backup CSVs after processing")
    print("\n" + "="*70)
    
    # Create log file
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    log_file = LOGS_DIR / f"pipeline_run_{timestamp}.log"
    
    # Run make run
    try:
        # Change to project root for make command
        os.chdir(PROJECT_ROOT)
        
        # Run with logging
        with open(log_file, 'w') as log:
            process = subprocess.Popen(
                ['make', 'run'],
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                text=True,
                bufsize=1
            )
            
            # Stream output in real-time
            for line in process.stdout:
                print(line, end='', flush=True)
                log.write(line)
            
            returncode = process.wait()
        
        print("\n" + "="*70)
        if returncode == 0:
            print("‚úÖ PIPELINE COMPLETED SUCCESSFULLY!")
            print(f"Log saved to: {log_file}")
            return True
        else:
            print(f"‚ùå PIPELINE FAILED with return code {returncode}")
            print(f"Check log: {log_file}")
            return False
    except Exception as e:
        print(f"‚ùå Error running pipeline: {e}")
        return False
    finally:
        print(f"\nFinished at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        print("="*70)

## üöÄ EXECUTE: Run Full Pipeline

**IMPORTANT:** This will take a while (15-60+ minutes depending on your data).

The pipeline will:
1. ‚úÖ Backup CSVs (before)
2. üîÑ Run full analysis
3. ‚úÖ Backup CSVs (after)

Output will appear in real-time below:

In [None]:
# RUN THE PIPELINE
success = run_pipeline()

if success:
    print("\n‚ú® Your analysis is complete and backed up!")
else:
    print("\n‚ö†Ô∏è  Pipeline encountered an error. Check logs above.")

## Check Results

In [None]:
def check_pipeline_results():
    """
    Check what was generated by the pipeline.
    """
    results_base = PROJECT_ROOT / 'Results'
    
    print("Pipeline Results:")
    print("="*70)
    
    if results_base.exists():
        # Count result files
        results = list(results_base.rglob('*'))
        files = [f for f in results if f.is_file()]
        
        print(f"‚úì Results folder: {results_base}")
        print(f"  Total items: {len(results)}")
        print(f"  Files: {len(files)}")
        
        # Show folder structure
        if results:
            print("\nContents:")
            for item in sorted(results_base.iterdir()):
                if item.is_dir():
                    file_count = len(list(item.rglob('*')))
                    print(f"  üìÅ {item.name}/ ({file_count} items)")
    else:
        print(f"‚ùå Results folder not found: {results_base}")
    
    # Check CSV outputs
    csv_base = PROJECT_ROOT / 'Data' / 'Opto' / 'Combined'
    print(f"\n\nCSV Outputs:")
    print("="*70)
    
    if csv_base.exists():
        csvs = list(csv_base.glob('*.csv'))
        print(f"‚úì CSV folder: {csv_base}")
        print(f"  CSV files: {len(csvs)}")
        
        for csv in sorted(csvs):
            size_mb = csv.stat().st_size / (1024*1024)
            print(f"    - {csv.name} ({size_mb:.1f} MB)")
    else:
        print(f"‚ùå CSV folder not found: {csv_base}")
    
    # Check if backups were created
    print(f"\n\nBackup Status:")
    print("="*70)
    
    backups_dir = PROJECT_ROOT / 'backups_compressed'
    if backups_dir.exists():
        zips = list(backups_dir.glob('*.zip'))
        print(f"‚úì Compressed archives: {len(zips)}")
        
        for zip_file in sorted(zips, reverse=True)[:5]:  # Show last 5
            size_mb = zip_file.stat().st_size / (1024*1024)
            print(f"    - {zip_file.name} ({size_mb:.1f} MB)")
    else:
        print("‚ÑπÔ∏è  No compressed archives yet (run make backup-compressed to create)")

check_pipeline_results()

## View Logs

In [None]:
# Find latest pipeline log
pipeline_logs = list(LOGS_DIR.glob('pipeline_run_*.log'))

if pipeline_logs:
    latest_log = sorted(pipeline_logs)[-1]
    print(f"Latest log: {latest_log.name}")
    print(f"Size: {latest_log.stat().st_size / (1024*1024):.1f} MB\n")
    
    # Show last 50 lines
    with open(latest_log, 'r') as f:
        lines = f.readlines()
    
    print("Last 50 lines of log:")
    print("="*70)
    for line in lines[-50:]:
        print(line, end='')
else:
    print("No pipeline logs found yet.")

## Quick Commands

Run these in separate cells:

```python
# Create compressed backup after pipeline
os.chdir(PROJECT_ROOT)
subprocess.run(['make', 'backup-compressed'])
```

```python
# Check Box backups
subprocess.run(['rclone', 'ls', 'Box-Folder:Ramanlab-Backups/'])
```

```python
# Check logs
subprocess.run(['tail', '-f', str(LOGS_DIR / 'backup.log')])
```