In [None]:
import os
os.environ['USE_PYGEOS'] = '0'
import numpy as np
import pandas as pd
import geopandas as gpd
import xarray as xr
from pathlib import Path

In [None]:
path = Path('Z:/nahaUsers/casadje/GloFASv4/US/rsfil/')

## Reservoirs

In [None]:
# load shapefile
reservoirs = gpd.read_file(path / 'GIS/reservoirs.shp')
reservoirs.set_index('ResID', drop=True, inplace=True)

## Extraction by Chus

In [None]:
files = [path / f'extraction/rsfil_{year}_extract121.nc' for year in [2019, 2020]]
rsfil_chs = xr.open_mfdataset(files)['rsfil'].to_pandas()

In [None]:
year = 2019
da = xr.open_dataset(path / f'raw/rsfil_{year}.nc')
da = da.sel(lon=slice(-114.125, -103.475), lat=slice(50.875, 41.925))
encoding = {'rsfil': {'dtype': 'float64', 'zlib': True, 'complevel': 4}}
da.to_netcdf(path / f'giuseppe/test/test_{year}.nc', encoding=encoding)

In [None]:
da_ = xr.open_dataset(path / f'giuseppe/test/test_{year}.nc')

In [None]:
da_

In [None]:
da['rsfil'].isel(time=100).plot()

## Extraction by Giuseppe

In [None]:
raw = pd.read_csv(path / 'giuseppe/output_1920.csv')
raw[['lon', 'lat']] = raw[['lon', 'lat']].round(3)
# raw.set_index(['lon', 'lat'], inplace=True)

In [None]:
# reshape time series
rsfil_gsp = pd.DataFrame(index=rsfil_chs.index, columns=reservoirs.index, dtype=float)
for ResID in reservoirs.index:
    lon, lat = reservoirs.loc[ResID].geometry.x, reservoirs.loc[ResID].geometry.y
    mask = (raw.lon == lon) & (raw.lat == lat)
    rsfil_gsp[ResID] = raw.loc[mask, 'rsfil'].values
rsfil_gsp.sort_index(axis=1, inplace=True)

In [None]:
assert rsfil_gsp.min().min() >= 0, 'There are erroneous values below 0'
assert rsfil_gsp.max().max() <= 1, 'There are erroneous values above 1'

## Comparison 

In [None]:
rsfil_chs.shape, rsfil_gsp.shape

In [None]:
rsfil_chs.head()

In [None]:
(rsfil_gsp == rsfil_chs)

In [None]:
np.all(np.isclose(rsfil_chs, rsfil_gsp, rtol=1e-10), axis=0)

In [None]:
rsfil_gsp.columns.intersection(rsfil_chs.columns)