In [26]:
from pathlib import Path
import numpy as np
import pandas as pd
import xarray as xr
import functools
import multiprocessing as mp
import os, sys
from tqdm import *
import pickle
import warnings
warnings.filterwarnings("ignore")

project_root = Path('/home/glen/works/forcing_generate_test/')
basin_id = '04040000'

forcingcsv_dir = Path(project_root / 'forcing_csvs')

rdrs = xr.open_dataset(project_root / 'generated'/ 'RDRSv2.1_precip_UTC5_1h.nc')
time = rdrs['time'].to_series().reset_index(drop=True)
with open(project_root / 'datetime.pkl','wb') as f:
    pickle.dump(time, f)


In [27]:
# Global variables will be used in the following functions
all_ids = []
all_cells = {}
all_weights = {}


with open(project_root / 'generated'/ f'grid_weights_{basin_id}.txt') as file:
    lines = [line.rstrip() for line in file][7:-1]

all_ids.append(basin_id)
all_cells[basin_id] = []
all_weights[basin_id] = []

for line in lines:
    temp = line.split()
    all_cells[basin_id].append(int(temp[1]))
    all_weights[basin_id].append(float(temp[2]))

In [28]:
# Test
flat_variable_array = np.load(Path(project_root / f'forcing_variables/RDRS_v2.1_A_PR0_SFC.npy'))
subbasin_cells = tuple(all_cells[basin_id])
subbasin_weights = tuple(all_weights[basin_id])
subset = np.take(flat_variable_array, subbasin_cells, 1)

a = np.array(subbasin_weights)
day_sum = 0
for i in range(16,40):
    b = subset[i]
    weighted = np.sum(a*b)
    day_sum += weighted

In [53]:
@functools.lru_cache(maxsize = None)
def load_variable_to_csv(variable_name : str, sub_cells: tuple, sub_weights: tuple, subbasin_id: str):
    subbasin_csv = subbasin_id + '.csv'
    csv_path = Path(forcingcsv_dir / subbasin_csv)

    with open(project_root / 'datetime.pkl', 'rb') as file:
        datetime_series = pickle.load(file)

    if not csv_path.is_file():
        subbasin_df = pd.DataFrame(datetime_series)
        subbasin_df.rename(columns={'time': 'Datetime'}, inplace=True)
        subbasin_df.to_csv(csv_path)
    subbasin_df = pd.read_csv(csv_path, index_col=[0])

    flat_variable_array = np.load(Path(project_root / f'forcing_variables/{variable_name}.npy')) # all values of a given variable
    subset = np.take(flat_variable_array, sub_cells, 1) # select variable values that overlaid with the target subbasin (during the entire study period)

    for day in range(len(subbasin_df)):
        all_cells_daily_values = subset[day]
        variable_value = 0
        for cell_index in range(len(sub_cells)):
            increment = all_cells_daily_values[cell_index] * sub_weights[cell_index]
            if np.isnan(increment):
                variable_value = variable_value + 0
            else:
                variable_value = variable_value + increment
        subbasin_df.loc[day, variable_name] = variable_value
    
    subbasin_df.to_csv(csv_path)
    

def clear1():
    load_variable_to_csv.cache_clear()


@functools.lru_cache(maxsize = None)
def create_subbasin_forcings(sub_id: str, variables: tuple):

    subbasin_cells = tuple(all_cells[sub_id])
    subbasin_weights = tuple(all_weights[sub_id])

    for variable in variables:
        load_variable_to_csv(variable, subbasin_cells, subbasin_weights, sub_id)
        clear1()


def clear2():
    create_subbasin_forcings.cache_clear()


def wrapper(subid: str):
    
    variables_list = os.listdir(project_root / 'forcing_variables')
    for i in range(len(variables_list)):
        variables_list[i] = variables_list[i][:-4]
    all_variables = tuple(variables_list)
    
    create_subbasin_forcings(subid, all_variables)
    clear2()

In [54]:
with mp.Pool(processes = 3) as p:
    max_ = len(all_ids)
    with tqdm(total = max_) as pbar:
        for _ in p.imap_unordered(wrapper, all_ids):
            pbar.update()

100%|██████████| 1/1 [00:38<00:00, 38.68s/it]
