# s2pipe — local launcher (Step 1: download)

This notebook is intended for **local execution** (Jupyter) inside the repository directory.
It installs the package in editable mode, ensures the repo `src/` is importable in the *current kernel*, and runs **Step 1 (download)**.

Important note: a freshly created editable `.pth` file is normally processed only at interpreter startup. Therefore, this notebook also injects `<repo>/src` into `sys.path` so you can import without restarting the kernel.


## Step 1 — Locate repo root and install (editable)
Run this from the repository root (where `pyproject.toml` is located).


In [16]:
from pathlib import Path
import os, sys, subprocess, importlib

# Find repo root by searching for pyproject.toml
cwd = Path.cwd().resolve()
repo_root = None
for p in [cwd, *cwd.parents]:
    if (p / 'pyproject.toml').exists():
        repo_root = p
        break

if repo_root is None:
    raise RuntimeError('pyproject.toml not found. Please open this notebook from the repo root.')

os.chdir(repo_root)
print('repo_root =', repo_root)
print('python =', sys.executable)

# Editable install
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-e', '.', '-q'])

# Ensure current kernel can import the package even without restart
src_path = str(repo_root / 'src')
if src_path not in sys.path:
    sys.path.insert(0, src_path)
importlib.invalidate_caches()

import s2pipe
print('s2pipe imported from:', s2pipe.__file__)
print('s2pipe version is:', s2pipe.__version__)


repo_root = /home/jan/Work/copernicus-s2-pipeline
python = /home/jan/.miniforge3/envs/miltech-gpu/bin/python
s2pipe imported from: /home/jan/Work/copernicus-s2-pipeline/src/s2pipe/__init__.py
s2pipe version is: 0.1.0


## Step 2 — Configure parameters

Notes:
- `top` must be `<= 1000` (CDSE OData limit).
- `cloud_min`/`cloud_max` are optional.
- `min_coverage_ratio` is applied to L1C in the current pipeline.


In [17]:
from pathlib import Path

# --- User parameters ---
TILE_ID = '33UWQ'  # with or without 'T'
DATE_FROM_UTC = '2025-12-01T00:00:00.000Z'
DATE_TO_UTC   = '2025-12-15T00:00:00.000Z'

CLOUD_MIN = 10.0   # set None to disable
CLOUD_MAX = 80.0   # set None to disable
MIN_COVERAGE_RATIO = 0.8

TOP = 50           # must be <= 1000
MAX_PAIRS = 3      # set None for no limit

OUT_DIR = Path('./out_local')  # adjust as needed
DRY_RUN = True                 # True = plan only; False = actually download


## Step 3 — Run download
You will be prompted for CDSE credentials (email + password, and TOTP if enabled).


In [18]:
from s2pipe.cfg import (
    PipelineConfig, QueryConfig, SelectionConfig, NodesIndexConfig,
    DownloadConfig, RunControlConfig, ManifestConfig,
)
from s2pipe.cdse.auth import prompt_auth
from s2pipe.pipeline import run_download

cfg = PipelineConfig(
    query=QueryConfig(
        tile_id=TILE_ID,
        date_from_utc=DATE_FROM_UTC,
        date_to_utc=DATE_TO_UTC,
        cloud_min=CLOUD_MIN,
        cloud_max=CLOUD_MAX,
        min_coverage_ratio=float(MIN_COVERAGE_RATIO),
        top=int(TOP),
        include_attributes_in_hits=True,
        tile_area_m2=1.21e10,
    ),
    selection=SelectionConfig(
        l1c_tile_metadata=True,
        l2a_scl_20m=True,
        l2a_tile_metadata=True,
    ),
    nodes_index=NodesIndexConfig(
        skip_dir_names=frozenset({'HTML', 'rep_info', 'DATASTRIP', 'AUX_DATA', 'QI_DATA'}),
        max_dirs_to_visit=50_000,
        enable_cache=True,
    ),
    download=DownloadConfig(
        out_dir=OUT_DIR,
        overwrite=False,
        dry_run=bool(DRY_RUN),
    ),
    control=RunControlConfig(
        max_pairs=MAX_PAIRS,
    ),
    manifest=ManifestConfig(
        write_json=True,
        export_table=True,
        store_geofootprint=True,
        runs_dir='runs',
        index_name='index.json',
    ),
)

if "auth" in globals():
    pass
else:
    auth = prompt_auth()
res = run_download(cfg, auth=auth)

print('Pairs:', len(res.pairs))
print('Run manifest:', getattr(res, 'manifest_path', None))
print('Run table CSV:', getattr(res, 'table_csv_path', None))
print('Run table XLSX:', getattr(res, 'table_xlsx_path', None))


Pairs: 1
Run manifest: out_local/meta/manifest/runs/20251230T181303Z/manifest.json
Run table CSV: out_local/meta/manifest/runs/20251230T181303Z/manifest_table.csv
Run table XLSX: out_local/meta/manifest/runs/20251230T181303Z/manifest_table.xlsx


## Step 4 — Inspect outputs
Variant A layout (recommended):
- `meta/manifest/runs/<RUN_ID>/...`
- `meta/manifest/index.json`


In [19]:
from pathlib import Path

manifest_root = OUT_DIR / 'meta' / 'manifest'
print('manifest_root:', manifest_root)

if manifest_root.exists():
    runs_dir = manifest_root / 'runs'
    if runs_dir.exists():
        run_dirs = sorted([p for p in runs_dir.iterdir() if p.is_dir()])
        print('runs:', len(run_dirs))
        if run_dirs:
            print('latest run:', run_dirs[-1])
            for p in sorted(run_dirs[-1].glob('*')):
                print('  -', p.name)
    idx = manifest_root / 'index.json'
    print('index.json exists:', idx.exists())
else:
    print('No outputs yet (did the run complete?)')


manifest_root: out_local/meta/manifest
runs: 1
latest run: out_local/meta/manifest/runs/20251230T181303Z
  - manifest.json
  - manifest_table.csv
  - manifest_table.xlsx
index.json exists: False
