In [None]:
# Cell 1 - Setup: Mount Google Drive for persistence
from google.colab import drive
drive.mount('/content/drive')

# Create persistent data directory
import os
TELNET_DATADIR = '/content/drive/MyDrive/telnet_data'
os.makedirs(TELNET_DATADIR, exist_ok=True)
os.makedirs(f'{TELNET_DATADIR}/era5', exist_ok=True)
os.makedirs(f'{TELNET_DATADIR}/shapefiles', exist_ok=True)
os.makedirs(f'{TELNET_DATADIR}/models', exist_ok=True)
os.environ['TELNET_DATADIR'] = TELNET_DATADIR
print(f"Data directory: {TELNET_DATADIR}")

In [None]:
# Cell 2 - Clone repository and install dependencies
%cd /content
!rm -rf telnet  # Clean previous install if exists
!git clone https://github.com/gscerveira/telnet.git
%cd telnet

# Install uv for faster package management
!pip install -q uv

# Install dependencies with uv (much faster than pip)
!uv pip install --system -q -r docker/requirements.txt
!uv pip install --system -q s3fs geopandas rioxarray icechunk virtualizarr

print("Dependencies installed!")

In [None]:
# Cell 3 - Verify GPU
!nvidia-smi
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Cell 4a - Build Virtual ERA5 Stores (FAST - only creates references)
# This replaces the slow download step - no data is actually downloaded!
%cd /content/telnet
import os
os.environ['TELNET_DATADIR'] = '/content/drive/MyDrive/telnet_data'

# Build virtual stores (~5-10 minutes to index files, NO download)
!python build_virtual_era5.py -idate 194001 -fdate 202512

print("Virtual stores created! Data will stream on-demand from S3.")

In [None]:
# Cell 4b - Verify virtual stores work
%cd /content/telnet
import os
os.environ['TELNET_DATADIR'] = '/content/drive/MyDrive/telnet_data'

from load_virtual_era5 import open_virtual_era5, load_era5_region

# Open the virtual precipitation store
ds = open_virtual_era5('precipitation')
print("Virtual dataset:")
print(ds)

# Load just Maranhao region for January 2020 (streams only needed data)
lats = (-10.25, -1.0)
lons = (-48.75, -41.5)
data = load_era5_region('precipitation', lats, lons, time_slice=slice('2020-01', '2020-01'))
print(f"\nLoaded Maranhao data shape: {data.shape}")
print(f"Data size in memory: {data.nbytes / 1e6:.2f} MB")

In [None]:
# Cell 4 - Download ERA5 data from AWS
%cd /content/telnet
import os
os.environ['TELNET_DATADIR'] = '/content/drive/MyDrive/telnet_data'

# This downloads precipitation, winds, geopotential, and land-sea mask
# Takes ~30-60 minutes depending on date range
!python download_era5_aws.py -idate 194001 -fdate 202512

In [None]:
# Cell 5 - Download ERSSTv5 (sea surface temperature)
%cd /content/telnet
import os
os.environ['TELNET_DATADIR'] = '/content/drive/MyDrive/telnet_data'

from download_preprocess_data import download_ersstv5
download_ersstv5('1940-01-01', '2025-12-01')
print("ERSSTv5 downloaded!")

In [None]:
# Cell 6 - Download Maranhao shapefile
%cd /content/telnet
import os
os.environ['TELNET_DATADIR'] = '/content/drive/MyDrive/telnet_data'

!python download_maranhao_shapefile.py
print("Shapefile downloaded!")

In [None]:
# Cell 7 - Compute climate indices
%cd /content/telnet
import os
os.environ['TELNET_DATADIR'] = '/content/drive/MyDrive/telnet_data'

!python compute_climate_indices.py -fdate 202512

In [None]:
# Cell 8 - Feature pre-selection
# Adjust n_samples based on available time (100 is faster, 1000 is more robust)
%cd /content/telnet
import os
os.environ['TELNET_DATADIR'] = '/content/drive/MyDrive/telnet_data'

!python feature_pre_selection.py -n 100

In [None]:
# Cell 9 - Model selection (GPU intensive)
# This is the longest step - can take several hours
%cd /content/telnet
import os
os.environ['TELNET_DATADIR'] = '/content/drive/MyDrive/telnet_data'

!chmod +x model_selection.sh
!./model_selection.sh 100 1

In [None]:
# Cell 10 - Model testing
%cd /content/telnet
import os
os.environ['TELNET_DATADIR'] = '/content/drive/MyDrive/telnet_data'

!python model_testing.py -n 100 -c 1

In [None]:
# Cell 11 - Generate forecasts for 2025
# Run for each initialization month
%cd /content/telnet
import os
os.environ['TELNET_DATADIR'] = '/content/drive/MyDrive/telnet_data'

!chmod +x generate_forecast.sh

# January 2025 initialization
!./generate_forecast.sh 202501 1

# April 2025 initialization
!./generate_forecast.sh 202504 1

# July 2025 initialization
!./generate_forecast.sh 202507 1

# October 2025 initialization
!./generate_forecast.sh 202510 1

In [None]:
# Cell 12 - Extract Maranhao region from forecasts
%cd /content/telnet
import os
os.environ['TELNET_DATADIR'] = '/content/drive/MyDrive/telnet_data'
DATADIR = os.environ['TELNET_DATADIR']

import glob

# Find all forecast files and extract Maranhao region
for init_date in ['202501', '202504', '202507', '202510']:
    results_dir = f'{DATADIR}/results/{init_date}'
    if os.path.exists(results_dir):
        for f in glob.glob(f'{results_dir}/*.nc'):
            if not f.startswith('maranhao_'):
                output = f'{results_dir}/maranhao_{os.path.basename(f)}'
                !python extract_maranhao.py "{f}" "{output}" --shapefile-dir {DATADIR}/shapefiles
                print(f"Extracted: {output}")

In [None]:
# Cell 13 - Verify forecasts against observations
%cd /content/telnet
import os
os.environ['TELNET_DATADIR'] = '/content/drive/MyDrive/telnet_data'
DATADIR = os.environ['TELNET_DATADIR']

for init_date in ['202501', '202504', '202507', '202510']:
    results_dir = f'{DATADIR}/results/{init_date}'
    forecast_file = f'{results_dir}/maranhao_ensemble.nc'
    obs_file = f'{DATADIR}/era5/era5_pr_2025-2025_preprocessed.nc'
    output_dir = f'{results_dir}/verification'

    if os.path.exists(forecast_file):
        !python verify_forecasts.py "{forecast_file}" "{obs_file}" -o "{output_dir}" --shapefile-dir {DATADIR}/shapefiles
        print(f"Verification complete for {init_date}")

In [None]:
# Cell 14 - View results
%cd /content/telnet
import os
os.environ['TELNET_DATADIR'] = '/content/drive/MyDrive/telnet_data'
DATADIR = os.environ['TELNET_DATADIR']

import pandas as pd
from IPython.display import display, Image

# Show skill scores
for init_date in ['202501', '202504', '202507', '202510']:
    skills_file = f'{DATADIR}/results/{init_date}/verification/skill_scores.csv'
    if os.path.exists(skills_file):
        print(f"\n=== {init_date} Skill Scores ===")
        display(pd.read_csv(skills_file))

    # Show reliability diagram if exists
    diagram = f'{DATADIR}/results/{init_date}/verification/reliability_diagrams.png'
    if os.path.exists(diagram):
        display(Image(diagram))