In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import glob
from pathlib import Path
from tqdm.notebook import tqdm
import pandas as pd
import xarray as xr
import geopandas as gpd
from datetime import timedelta

In [4]:
ws_file = gpd.read_file('../data/great_db/geometry/russia_ws.gpkg')
ws_file = ws_file.set_index('gauge_id')

In [5]:
def read_with_date_index(file_path: str):

    data = pd.read_csv(file_path)
    data['date'] = pd.to_datetime(data['date'])
    data = data.set_index('date')

    return data

In [6]:
cmip_dir = '../data/cmip_result_585/'

datasets = [meteo_path
            for meteo_path in glob.glob(f'{cmip_dir}/*')
            if 'weights' not in meteo_path]

variables = {ds.split('/')[-1]: {meteo_vars.split('/')[-1]: meteo_vars
                                 for meteo_vars
                                 in glob.glob(f'{ds}/*')}

             for ds in datasets}

by_gauge = dict()
res_folder = Path('../data/cmip_concat_21_09/cmip_585/')
res_folder.mkdir(exist_ok=True, parents=True)
for gauge in tqdm(ws_file.index):
    ds_res = dict()
    for ds_name, var in variables.items():
        ds_res[ds_name] = list()
        for var_name, var_path in var.items():
            df = read_with_date_index(f'{var_path}/{gauge}.csv')
            if (('t2m' in df.columns) |
                any('tas'
                    in col for col in df.columns)):
                df -= 273.15
            elif 'pr' in df.columns:
                df *= 1e5
            df.columns = [f'{var_name}_{ds_name}']
            if df.index[0].hour == 12:
                # print(ds_name)
                df.index += timedelta(hours=12)
            # df = df.dropna(axis=0)

            df = df.interpolate()
            ds_res[ds_name].append(df)
        res_xr = pd.concat(ds_res[ds_name], axis=1).to_xarray()
        res_xr = res_xr.assign_coords(gauge_id=('gauge_id', [gauge]))
        ds_res[ds_name] = res_xr
        by_gauge[gauge] = ds_res
    by_gauge[gauge] = xr.merge([xr for xr in by_gauge[gauge].values()])
    by_gauge[gauge].to_netcdf(f'{res_folder}/{gauge}.nc')

A Jupyter Widget