In [None]:
import intake
import xarray as xr
import warnings
import fsspec
import pandas as pd
import os
from cmip6_downscaling.methods.common import utils
from dask.distributed import Client

warnings.simplefilter(action='ignore', category=FutureWarning)
freq_dict = {'1AS': 'yearly', '1MS': 'monthly'}
freq_dict_lookup = {'yearly': '1AS', 'monthly': '1MS'}

In [None]:
client = Client()
client

In [None]:
def get_fsspec_fs():
    if os.environ.get('AZURE_STORAGE_CONNECTION_STRING'):
        del os.environ['AZURE_STORAGE_CONNECTION_STRING']
    sas_token = ''
    account_options = {'account_name': "cpdataeuwest", 'sas_token': sas_token}
    return fsspec.filesystem('az', **account_options)


def store_exists(fs: fsspec.filesystem, store_path: str) -> bool:
    return fs.exists(store_path.split('windows.net/')[1])


def return_intake_cat_df():
    return intake.open_esm_datastore(
        'https://cpdataeuwest.blob.core.windows.net/cp-cmip/version1/catalogs/global-downscaled-cmip6.json'
    ).df


def create_time_summary_options(df: pd.DataFrame) -> dict:
    """Create dictionary of frequency options and zarr store uris."""
    time_summary_dict = {}
    for freq in freq_dict.keys():
        time_summary_dict.update(
            {
                freq_dict[freq]: list(
                    df['downscaled_daily_data_uri'].str.replace('day', freq_dict[freq])
                )
            }
        )
    return time_summary_dict


def resample_dataset(resampled_path: str, freq: str, output_path: str = None):
    """resamples zarr store to frequency (monthly, yearly). Writes output to zarr store"""
    daily_path = resampled_path.replace(freq, 'day')
    ds = xr.open_zarr(daily_path)
    out_ds = utils.resample_wrapper(ds, freq=freq_dict_lookup[freq])
    if output_path:
        out_ds.to_zarr(output_path, mode='w', consolidated=True)
    else:
        out_ds.to_zarr(resampled_path, mode='w', consolidated=True)


def resample_time_summaries(time_summary_dict: dict, fs: fsspec.filesystem):
    """Iterates through aval time frequencies (monthly, yearly), checks if store exists, runs resample_dataset function."""
    for freq in time_summary_dict.keys():
        for time_summary in time_summary_dict[freq]:
            if not store_exists(fs, time_summary):  # if fsspec finds the store does not exist:
                output_path = 's3://carbonplan-scratch' + time_summary.split('.net')[1]
                resample_dataset(time_summary, freq, output_path=output_path)


def update_catalog(intake_cat_df: pd.DataFrame, fs: fsspec.filesystem):
    """Updates catalog with newly resampled stores"""
    for freq in freq_dict.keys():
        for index, row in intake_cat_df.iterrows():
            mod_row = row.copy(deep=True)
            downscaled_freq_data_uri = mod_row.downscaled_daily_data_uri.replace(
                'day', freq_dict[freq]
            )
            if store_exists(fs, downscaled_freq_data_uri):  # if the store exists
                mod_row.timescale = freq_dict[freq]
                mod_row.downscaled_daily_data_uri = downscaled_freq_data_uri
                intake_cat_df.append(mod_row, ignore_index=False)
                intake_cat_df.loc[len(intake_cat_df)] = mod_row

    return intake_cat_df.drop_duplicates(
        subset=intake_cat_df.columns.difference(['downscaled_daily_data_uri']), keep='first'
    )


# check catalog, then save and replace once QA'd.

In [None]:
fs = get_fsspec_fs()
intake_cat_df = return_intake_cat_df()
time_summary_dict = create_time_summary_options(intake_cat_df)
resample_time_summaries(time_summary_dict, fs)
updated_catalog = update_catalog(intake_cat_df, fs)

In [None]:
# updated_catalog.to_csv('s3://carbonplan-scratch/updated_catalog.csv',index=False)

In [None]:
###
ds_month = xr.open_zarr(
    's3://carbonplan-scratch/cp-cmip/version1/data/DeepSD-BC/CMIP.CCCma.CanESM5.historical.r1i1p1f1.monthly.DeepSD-BC.pr.zarr'
)
ds_year = xr.open_zarr(
    's3://carbonplan-scratch/cp-cmip/version1/data/DeepSD-BC/CMIP.CCCma.CanESM5.historical.r1i1p1f1.yearly.DeepSD-BC.pr.zarr'
)

In [None]:
ds_month

In [None]:
ds_year