In [4]:
import xarray as xr
import numpy as np

import pandas as pd
import geopandas as gpd

# from ipywidgets import IntProgress
from IPython.display import clear_output

from tqdm import tqdm

import os

> ## <span style="font-size: 25px; line-height: 1; color: #00b8d4; margin-right: 100px;">&#9998;<span> <strong style="color: #00b8d4;">Note</strong>
>This notebook will extract climate timeseries from SILO NetCDF files and use them to create daily timeseries for rainfall runoff models. This includes PET and Rainfall data. Data will be written into a CSV file and will be constructed according to the data in the file rererence_table.csv. This table is built using the build_reference_table.ipynb notebook. 
> <br/><br/>



In [2]:
region = 'MW'        # one of ['BU', 'FI', 'MW', 'BU', 'BU', 'BU'] 
component = 'rain'    # should be rain or pet

#start_year = 1970
start_year = 2024
finish_year = 2025




# The only thing that may be changed below is the path to the SILO NetCDF files

reference_table = pd.read_csv('reference_table.csv')

years = range(start_year, finish_year + 1)

if component == 'rain' :
    prefix = 'J:/TS/ClimateInputs/NetCDFdumps/Rainfall_Daily/'
    suffix = '.daily_rain.nc'

if component == 'pet' :
    prefix = 'J:/TS/ClimateInputs/NetCDFdumps/ET_Morton_Wet/'
    suffix = '.et_morton_wet.nc'





> ## <span style="font-size: 25px; line-height: 1.0; color: #00b8d4; margin-right: 100px;">&#9998;<span> <strong style="color: #00b8d4;">Note</strong>
>If everything is set up correctly. 
> <br/><br/>


In [6]:
def build_timeseries(region, year_range, component):
    if component == 'rain' :
        prefix = 'F:/TS/ClimateInputs/NetCDFdumps/Rainfall_Daily/'
        suffix = '.daily_rain.nc'

    if component == 'pet' :
        prefix = 'F:/TS/ClimateInputs/NetCDFdumps/ET_Morton_Wet/'
        suffix = '.et_morton_wet.nc'

    timeseries = pd.DataFrame() 

    sub_reference_table = reference_table.query('region == @region')
    subcatchment_list = sub_reference_table.subcatchment.unique()

    for year in year_range:
        print('running year ', year)

        
        filename = prefix + str(year) + suffix
        ds = xr.open_dataset(filename)
        ds.load()
        datetimeindex = ds.indexes['time']
        data = np.zeros((len(subcatchment_list), len(datetimeindex))) 
        subcatchment_list = sub_reference_table.subcatchment.unique().tolist()
        
        count = 0

    #    for subcatchment in subcatchment_list:
        for k in tqdm(range(len(subcatchment_list))):
            subcatchment = subcatchment_list[k]
            fragments = sub_reference_table.query('subcatchment == @subcatchment')
            for i, j in fragments.iterrows():
                xx = j.x
                yy = j.y
                fraction = j.grid_weight
                if component ==  'rain' :
                    data[count,:] += ds.sel(lon=xx,lat=yy,method='nearest').daily_rain.data * fraction
                if component == 'pet' :
                    data[count,:] += ds.sel(lon=xx,lat=yy,method='nearest').et_morton_wet.data * fraction
            count += 1
        
        out = pd.DataFrame(data.T)
        out.columns = subcatchment_list
        out.set_index(datetimeindex, inplace=True)
    #    out.to_csv(output_directory + str(year)+'.csv')

        timeseries = pd.concat([timeseries, out])
        clear_output(wait=True)

    timeseries.to_csv(region + '_' + component + '_timeseries.csv')
    

In [7]:
#region_list = ['BM', 'FI', 'MW', 'BU', 'WT', 'CY']
region_list = ['CY']
component_list = ['rain', 'pet']

for region in region_list :
    for component in component_list :
        print(' Running ', component, 'for ', region)
        build_timeseries(region, years, component)

running year  2025


100%|██████████| 551/551 [00:04<00:00, 120.78it/s]


In [11]:
timeseries = pd.DataFrame() 

sub_reference_table = reference_table.query('region == @region')
subcatchment_list = sub_reference_table.subcatchment.unique()

for year in years:
    print('running year ', year)

    
    filename = prefix + str(year) + suffix
    ds = xr.open_dataset(filename)
    ds.load()
    datetimeindex = ds.indexes['time']
    data = np.zeros((len(subcatchment_list), len(datetimeindex))) 
    subcatchment_list = sub_reference_table.subcatchment.unique().tolist()
    
    count = 0

#    for subcatchment in subcatchment_list:
    for k in tqdm(range(len(subcatchment_list))):
        subcatchment = subcatchment_list[k]
        fragments = sub_reference_table.query('subcatchment == @subcatchment')
        for i, j in fragments.iterrows():
            xx = j.x
            yy = j.y
            fraction = j.grid_weight
            if component ==  'rain' :
                data[count,:] += ds.sel(lon=xx,lat=yy,method='nearest').daily_rain.data * fraction
            if component == 'pet' :
                data[count,:] += ds.sel(lon=xx,lat=yy,method='nearest').et_morton_wet.data * fraction
        count += 1
    
    out = pd.DataFrame(data.T)
    out.columns = subcatchment_list
    out.set_index(datetimeindex, inplace=True)
#    out.to_csv(output_directory + str(year)+'.csv')

    timeseries = pd.concat([timeseries, out])
    clear_output(wait=True)

timeseries.to_csv(region + '_' + component + '_timeseries.csv')
    

    

running year  2025


100%|██████████| 203/203 [00:01<00:00, 151.42it/s]


<div class="admonition note" style="background: rgba(0,123,255,.1); padding-top: 0px; padding-bottom: 6px; border-radius: 8px; border-left: 8px solid #007bff; border-color: #007bff; padding-left: 10px; padding-right: 10px">
    <p class="title">
        <i style="font-size: 18px; color:#007bff;"></i>
        <b style="color: #007bff;"> <span style="color: #007bff;">&#8505;</span> Note</b>
    </p>
    <p>Running to following cell will run the climate collation step to prepare a set of files for importing into Source.
    The resulting files will be written to the output directory</p>
</div>

In [12]:
output_directory = region + '_collation/' 
directory_path = region + '_collation'


if not os.path.exists(directory_path):
    # Create the directory
    os.makedirs(directory_path)
    print(f"Directory '{directory_path}' was created.")
else:
    print(f"Directory '{directory_path}' already exists.")



input_data = timeseries.copy()
date_series = input_data.index

if component == 'rain' :
    for data in input_data.columns:
        label = 'rainfall for ' + data
        output_data = pd.DataFrame()
        output_data['Date'] = date_series
        output_data[label] = input_data[data].values
        output_data.to_csv(output_directory + label + '.csv', index=False)

if component == 'pet' :
    for data in input_data.columns:
        label = 'pet for ' + data
        output_data = pd.DataFrame()
        output_data['Date'] = date_series
        output_data[label] = input_data[data].values
        output_data.to_csv(output_directory + label + '.csv', index=False)

Directory 'MW_collation' already exists.


In [8]:
def do_collation(region, component):

    timeseries = pd.read_csv(region + '_' + component + '_timeseries.csv')
    output_directory = region + '_collation/' 
    directory_path = region + '_collation'


    if not os.path.exists(directory_path):
        # Create the directory
        os.makedirs(directory_path)
        print(f"Directory '{directory_path}' was created.")
    else:
        print(f"Directory '{directory_path}' already exists.")



    input_data = timeseries.copy()
#    date_series = input_data.index
    date_series = input_data['time']
    del input_data['time']

    if component == 'rain' :
        for data in input_data.columns:
            label = 'rainfall for ' + data
            output_data = pd.DataFrame()
            output_data['Date'] = date_series
            output_data[label] = input_data[data].values
            output_data.to_csv(output_directory + label + '.csv', index=False)

    if component == 'pet' :
        for data in input_data.columns:
            label = 'pet for ' + data
            output_data = pd.DataFrame()
            output_data['Date'] = date_series
            output_data[label] = input_data[data].values
            output_data.to_csv(output_directory + label + '.csv', index=False)

In [9]:


# region_list = ['BM', 'FI', 'MW', 'BU', 'WT', 'CY']
region_list = ['CY']
#region_list = ['BM', 'FI', 'MW', 'BU', 'WT', 'CY']
component_list = ['rain', 'pet']

for region in region_list :
    for component in component_list :
        print(' Running ', component, 'for ', region)
        do_collation(region, component)

 Running  rain for  CY
Directory 'CY_collation' was created.
 Running  pet for  CY
Directory 'CY_collation' already exists.
