# Load and Inspect Online Forecast Dataset

This notebook demonstrates how to load and inspect a forecast dataset using the NeuralHydrology config.

In [1]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

# Add local neuralhydrology path for debugging
sys.path.insert(0, '/home/sngrj0hn/GitHub/neuralhydrology')

# Change to the neuralhydrology root directory (required for config file paths)
os.chdir('/home/sngrj0hn/GitHub/neuralhydrology')

from neuralhydrology.utils.config import Config
from neuralhydrology.datasetzoo.onlineforecastdataset import OnlineForecastDataset

# Verify we're using the local development version
print(f"\nModule locations after reload:")
print(f"  Config: {Config.__module__}")
print(f"  ForecastDataset: {OnlineForecastDataset.__module__}")
print(f"  ForecastDataset file: {OnlineForecastDataset.__file__ if hasattr(OnlineForecastDataset, '__file__') else 'No __file__ attribute'}")

# Check if we can access the source file
forecast_source = Path('/home/sngrj0hn/GitHub/neuralhydrology/neuralhydrology/datasetzoo/onlineforecastdataset.py')
print(f"  Source file exists: {forecast_source.exists()}")
if forecast_source.exists():
    print(f"  Source file path: {forecast_source.absolute()}")
    
# Additional check - try to get the actual source file path
try:
    import inspect
    source_file = inspect.getfile(OnlineForecastDataset)
    print(f"  Actual source file: {source_file}")
except Exception as e:
    print(f"  Could not get source file: {e}")

print(f"\n✓ Ready for debugging - breakpoints should now work in:")
print(f"  {forecast_source.absolute()}")


Module locations after reload:
  Config: neuralhydrology.utils.config
  ForecastDataset: neuralhydrology.datasetzoo.onlineforecastdataset
  ForecastDataset file: No __file__ attribute
  Source file exists: True
  Source file path: /home/sngrj0hn/GitHub/neuralhydrology/neuralhydrology/datasetzoo/onlineforecastdataset.py
  Actual source file: /home/sngrj0hn/GitHub/neuralhydrology/neuralhydrology/datasetzoo/onlineforecastdataset.py

✓ Ready for debugging - breakpoints should now work in:
  /home/sngrj0hn/GitHub/neuralhydrology/neuralhydrology/datasetzoo/onlineforecastdataset.py


## Load Configuration

First, let's load the configuration file and examine the key settings.

In [2]:
# Load the config file
config_path = Path('/home/sngrj0hn/GitHub/neuralhydrology/operational_harz/gefs_10d_sample/config.yml')
config = Config(config_path)

# Display key forecast settings
print("Forecast Configuration:")
print(f"  Forecast sequence length: {config.forecast_seq_length}")
print(f"  Forecast offset: {config.forecast_offset}")
print(f"  Sequence length: {config.seq_length}")
print(f"  Predict last n: {config.predict_last_n}")
print(f"  Target variables: {config.target_variables}")
print(f"  Dynamic inputs: {config.dynamic_inputs}")
print(f"  Forecast inputs: {config.forecast_inputs}")
print(f"  Hindcast inputs: {config.hindcast_inputs}")

Forecast Configuration:
  Forecast sequence length: 240
  Forecast offset: 0
  Sequence length: 720
  Predict last n: 240
  Target variables: ['discharge_vol']
  Dynamic inputs: ['maximum_temperature_2m_q25', 'maximum_temperature_2m_q50', 'maximum_temperature_2m_q75', 'minimum_temperature_2m_q25', 'minimum_temperature_2m_q50', 'minimum_temperature_2m_q75', 'precipitation_surface_q25', 'precipitation_surface_q50', 'precipitation_surface_q75', 'relative_humidity_2m_q25', 'relative_humidity_2m_q50', 'relative_humidity_2m_q75', 'temperature_2m_q25', 'temperature_2m_q50', 'temperature_2m_q75', 'temperature_2m', 'relative_humidity_2m', 'dew_point_2m', 'precipitation', 'rain', 'snowfall', 'surface_pressure', 'et0_fao_evapotranspiration', 'wind_direction_10m', 'soil_moisture_0_to_7cm', 'soil_moisture_7_to_28cm', 'soil_moisture_28_to_100cm', 'shortwave_radiation']
  Forecast inputs: ['maximum_temperature_2m_q25', 'maximum_temperature_2m_q50', 'maximum_temperature_2m_q75', 'minimum_temperature_2

## Create Forecast Dataset

Now let's create a ForecastDataset instance and examine its properties.

In [None]:
# Create the forecast dataset
dataset = OnlineForecastDataset(
    cfg=config,
    is_train=True, 
    period='train'
)

print(f"Dataset created with {len(dataset)} samples")
print(f"Basins: {dataset.basins}")
print(f"Period: {dataset.period}")
print(f"Frequencies: {dataset.frequencies}")
print(f"Sequence length: {dataset.seq_len}")
print(f"Forecast sequence length: {dataset._forecast_seq_len}")