In [1]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

Add the src directory to the path so we can import our modules

In [2]:
sys.path.append(os.path.abspath('../src'))

from data_utils import load_usgs_data, load_nwm_forecasts

Set paths to data folders

In [3]:
data_path = "../data"
stations = ["station1", "station2"]

Dictionary to store data for each station

In [4]:
station_data = {}

Load data for each station

In [5]:
for station in stations:
    station_path = os.path.join(data_path, station)
    
    print(f"Loading data for {station}...")
    
    # Load USGS data
    try:
        usgs_data = load_usgs_data(station_path)
        print(f"  USGS data loaded: {len(usgs_data)} observations")
    except Exception as e:
        print(f"  Error loading USGS data: {e}")
        continue
    
    # Load NWM forecasts
    try:
        nwm_data = load_nwm_forecasts(station_path)
        print(f"  NWM data loaded: {len(nwm_data)} forecast points")
    except Exception as e:
        print(f"  Error loading NWM forecasts: {e}")
        continue
    
    # Store data for this station
    station_data[station] = {
        'usgs': usgs_data,
        'nwm': nwm_data
    }

print("Data loading complete!")

Loading data for station1...
  USGS data loaded: 70089 observations
  NWM data loaded: 326160 forecast points
Loading data for station2...
  USGS data loaded: 65498 observations
  NWM data loaded: 326160 forecast points
Data loading complete!


Save processed data

In [6]:
os.makedirs('../data/processed', exist_ok=True)

with open('../data/processed/station_data.pkl', 'wb') as f:
    pickle.dump(station_data, f)

print("Data saved to '../data/processed/station_data.pkl'")

Data saved to '../data/processed/station_data.pkl'
