In [1]:
# ============================================================
# 🚀 FULL PIPELINE ORCHESTRATOR (Dynamic Config Merge)
# ============================================================

!pip install papermill requests -q
import papermill as pm
import json, requests, time, csv, os, datetime

# ============================================================
# 1️⃣ Load base config from GitHub (for SAR extractor)
# ============================================================
CONFIG_URL = "https://raw.githubusercontent.com/JuanoS12/Space-Apps/main/scripts/automate/config.json"

try:
    response = requests.get(CONFIG_URL)
    response.raise_for_status()
    BASE_CONFIG = response.json()
    print(f"✅ Base config loaded from GitHub.")
except Exception as e:
    print(f"⚠️ Could not fetch remote config ({e}). Falling back to local 'config.json'")
    with open("config.json") as f:
        BASE_CONFIG = json.load(f)

# ============================================================
# 2️⃣ Extend config with orchestrator-specific parameters
# ============================================================

EXTRA_CONFIG = {
    "folders": {
        "exports": "SAR_Exports",
        "clean": "CLEAN/SAR_Exports",
        "mosaics": "CLEAN/SAR_Mosaics",
        "models": "models",
        "manifest": "manifest"
    },
    "cleaning": {
        "skip_existing": True,
        "dtype": "float32"
    },
    "mosaic": {
        "expected_aois": 12,
        "parallel_downloads": 4
    },
    "ml": {
        "enable_training": False,
        "model_type": "unet_resnet34",
        "model_path": "models/unet34_latest.pth"
    },
    "orchestrator": {
        "pipeline_log": "manifest/pipeline_log.csv",
        "stop_on_error": True
    }
}

# Merge both configurations
CONFIG = {**BASE_CONFIG, **EXTRA_CONFIG}

print("✅ Configuration merged successfully.")
print(f"→ Project: {CONFIG['project_id']}")
print(f"→ Bucket: {CONFIG['bucket']}")
print(f"→ AOI: {CONFIG['aoi_kmz']}")
print(f"→ Years: {CONFIG['start_year']}–{CONFIG['end_year']}")
print(f"→ Output folders: {CONFIG['folders']}")
print("────────────────────────────────────────────")

# ============================================================
# 3️⃣ Logging Setup
# ============================================================
LOG_PATH = CONFIG["orchestrator"]["pipeline_log"]
os.makedirs(os.path.dirname(LOG_PATH), exist_ok=True)

def log_stage(stage, status, duration_min, message=""):
    row = {
        "timestamp": datetime.datetime.utcnow().isoformat(),
        "stage": stage,
        "status": status,
        "duration_min": round(duration_min, 2),
        "message": message
    }
    write_header = not os.path.exists(LOG_PATH)
    with open(LOG_PATH, "a", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=row.keys())
        if write_header:
            writer.writeheader()
        writer.writerow(row)

# ============================================================
# 4️⃣ Define pipeline sequence
# ============================================================
PIPELINE_STEPS = [
    ("sar_time_lapse.ipynb", "01_extract_output.ipynb"),
    ("Limpia.ipynb", "02_clean_output.ipynb"),
    ("mosaic.ipynb", "03_mosaic_output.ipynb")
]

# ============================================================
# 5️⃣ Execute pipeline
# ============================================================
start_total = time.time()
for idx, (input_nb, output_nb) in enumerate(PIPELINE_STEPS, start=1):
    print(f"\n🚀 Step {idx}/{len(PIPELINE_STEPS)} → Running {input_nb} ...")
    stage_start = time.time()
    try:
        pm.execute_notebook(input_nb, output_nb, parameters=dict(CONFIG=CONFIG))
        duration = (time.time() - stage_start) / 60
        print(f"✅ {input_nb} completed in {duration:.2f} min.")
        log_stage(input_nb, "success", duration)
    except Exception as e:
        duration = (time.time() - stage_start) / 60
        print(f"❌ Error in {input_nb}: {e}")
        log_stage(input_nb, "failed", duration, str(e))
        if CONFIG["orchestrator"]["stop_on_error"]:
            break

total_duration = (time.time() - start_total) / 60
print("\n🎯 Full pipeline completed.")
print(f"⏱️ Total runtime: {total_duration:.2f} min")
log_stage("full_pipeline", "completed", total_duration)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
✅ Base config loaded from GitHub.
✅ Configuration merged successfully.
→ Project: nasa-space-apps-473722
→ Bucket: nasa_sar_spacetron
→ AOI: https://drive.google.com/uc?export=download&id=1oPVRvGny1y-pDE-5G-ayTUpSGHw0Rlh-
→ Years: 2020–2025
→ Output folders: {'exports': 'SAR_Exports', 'clean': 'CLEAN/SAR_Exports', 'mosaics': 'CLEAN/SAR_Mosaics', 'models': 'models', 'manifest': 'manifest'}
────────────────────────────────────────────


PermissionError: [Errno 13] Permission denied: 'manifest'