In [1]:
%load_ext autoreload
%autoreload 2

In [66]:
# Standard library imports
from typing import Union, Optional, Dict, Tuple
from pathlib import Path
import pickle
import pandas as pd
import xarray as xr
from helpers import cached_eval_run


def combine_datasets(results: Dict[int, Dict[str, Dict[str, Union[str, pd.DataFrame]]]]) -> pd.DataFrame:
    """
    Combine datasets from multiple folds.

    Parameters
    ----------
    results : Dict[int, Dict[str, Dict[str, Union[str, pd.DataFrame]]]]
        Dictionary containing results for different folds.

    Returns
    -------
    pd.DataFrame
        Combined DataFrame.
    """

    folds = results.keys()
    for fold in folds:
        for k,v in results[fold].items():
            basins.append(k)
            data_values.append(v['1D']['xr'])

    basins = [basin for fold in results.values() for basin in fold]
    data_values = [entry['1D']['xr'] for fold in results.values() for entry in fold.values()]
    combined_dataset = xr.concat(data_values, dim='basin').assign_coords(basin=basins)
    return combined_dataset.to_dataframe()

def create_pivot_tables(df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Create pivot tables for simulated and observed streamflow.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame containing the streamflow data.

    Returns
    -------
    Tuple[pd.DataFrame, pd.DataFrame]
        Tuple containing pivot tables for simulated and observed streamflow.
    """
    pivot_table_simulated = df.pivot_table(values='streamflow_mmd_sim', index='date', columns='basin')
    pivot_table_observed = df.pivot_table(values='streamflow_mmd_obs', index='date', columns='basin')
    return pivot_table_simulated, pivot_table_observed


In [67]:
# Set parameters
predictors = 'pe'
period = "test"
folds = [0, 1]

base_path = Path('/Users/sho108/Desktop/r/work/sho108/neuralhydrology_workflow/runs')

result = {
    fold: cached_eval_run(
        run_dir=base_path/f'spatial_twofold_{fold}_{predictors}_2501_181026',
        period=period,
        data_dir='/Users/sho108/Desktop/z/Data/CAMELS_AUS',
        recalculate=True
    )
    for fold in folds
}


# Extract basins and corresponding data values from the result dictionary
df = combine_datasets(result)

# Create pivot tables for simulated and observed streamflow
df_sim, df_obs = create_pivot_tables(df)

Cached data for test not found or recalculation forced. Generating and saving...
# Evaluation: 100%|██████████| 111/111 [10:05<00:00,  5.45s/it]
Cached data for test not found or recalculation forced. Generating and saving...
# Evaluation: 100%|██████████| 111/111 [10:08<00:00,  5.48s/it]


UnboundLocalError: local variable 'basins' referenced before assignment

In [None]:
/datasets/work/d61-coastal-forecasting-wp3/work/sho108/neuralhydrology/workflows/camelaus_lstm/runs/spatial_twofold_0_e_2501_181026/train_data
/Users/sho108/projects/neuralhydrology/workflows/camelaus_lstm/petrichore/runs/spatial_twofold_0_e_2501_181026/train_data