In [None]:
import xarray as xr
import numpy as np 
import pandas as pd
from hython.metrics import compute_fdc_fms, compute_fdc_fhv, compute_fdc_flv, compute_hr, compute_far, compute_csi

from hython.utils import write_to_zarr, read_from_zarr, set_seed



#### Read Data and some pre-processing

In [None]:
SURROGATE_INPUT = "https://eurac-eo.s3.amazonaws.com/INTERTWIN/SURROGATE_INPUT/adg1km_eobs_original.zarr/"
train_temporal_range = slice("2012-01-01","2022-12-31")

target_names = ["q_river", "vwc"]

Y = (
    read_from_zarr(url=SURROGATE_INPUT , group="y")
    .sel(time=train_temporal_range)[target_names]
)

simulated = Y.q_river.sel(lat=45.4, lon=11, method="nearest")
simulated

In [None]:
# Load the CSV file using numpy
data = np.genfromtxt("/mnt/CEPH_PROJECTS/InterTwin/hydrologic_data/ADO/ADO_discharge.csv", delimiter=',', names=True, dtype=None, encoding=None)

# Extract columns
time = np.array([np.datetime64(row['date']) for row in data if row['id_station'] == 'ADO_DSC_ITH3_0002'])
discharge = np.array([row['discharge_m3_s'] for row in data if row['id_station'] == 'ADO_DSC_ITH3_0002'])

# Filter based on the desired time range
start_date = np.datetime64('2012-01-01')
end_date = np.datetime64('2022-12-31')
mask = (time >= start_date) & (time <= end_date)

# Create xarray DataArray
observed = xr.DataArray(
    discharge[mask],
    dims="time",
    coords={"time": time[mask]},
    name="discharge_m3_s"
)

# Display the xarray object
observed

#### Check FDC biases

In [None]:
biasFMS = compute_fdc_fms(observed, simulated)
biasFHV = compute_fdc_fhv(observed, simulated)
biasFLV = compute_fdc_flv(observed, simulated)

#### Now for the soil moisture metrics

In [None]:
import xarray as xr
import numpy as np

def create_random_dataset(original_ds: xr.Dataset) -> xr.Dataset:
    """
    Create a random xarray.Dataset based on the structure of an existing xarray.Dataset.
    
    Parameters:
    original_ds (xr.Dataset): The original dataset to copy structure from.
    
    Returns:
    xr.Dataset: A new dataset with the same dimensions and coordinates but with random data.
    """
    random_data_vars = {}
    
    # Loop through each variable in the original dataset
    for var_name, var_data in original_ds.data_vars.items():
        # Generate random data of the same shape as the original variable
        random_data = np.random.random(var_data.shape)
        
        # Create a new DataArray with the same coordinates and assign the random data
        random_data_vars[var_name] = xr.DataArray(
            data=random_data,
            dims=var_data.dims,
            coords=var_data.coords
        )
    
    # Create a new dataset with the same coordinates and dimensions
    random_ds = xr.Dataset(data_vars=random_data_vars, coords=original_ds.coords)
    
    return random_ds


randomised_data_array = create_random_dataset(Y).vwc
Y_data_array = Y.vwc

In [None]:
hit_rate = compute_hr(Y_data_array, randomised_data_array)
far = compute_far(Y_data_array, randomised_data_array)
csi = compute_csi(Y_data_array, randomised_data_array)

In [None]:
import numpy as np
import xarray as xr

# Load the CSV file using numpy
data = np.genfromtxt("/mnt/CEPH_PROJECTS/InterTwin/hydrologic_data/ADO/ADO_discharge.csv", delimiter=',', names=True, dtype=None, encoding=None)

# Extract columns
time = np.array([np.datetime64(row['date']) for row in data if row['id_station'] == 'ADO_DSC_ITH3_0002'])
discharge = np.array([row['discharge_m3_s'] for row in data if row['id_station'] == 'ADO_DSC_ITH3_0002'])

# Filter based on the desired time range
start_date = np.datetime64('2012-01-01')
end_date = np.datetime64('2022-12-31')
mask = (time >= start_date) & (time <= end_date)

# Create xarray DataArray
observed_xr = xr.DataArray(
    discharge[mask],
    dims="time",
    coords={"time": time[mask]},
    name="discharge_m3_s"
)

# Display the xarray object
observed_xr


In [None]:
simulated_data = Y.q_river.sel(lat=45.4, lon=11, method="nearest")
simulated_data.values

In [None]:
observed_xr.values