In [1]:
import os
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
path_to_dataset = "/home/georgy/Documents/PEAK/results/tier2"

In [3]:
def meteo_characteristics(meteo_instance, states_instance, year):
    
    t = meteo_instance[f"{year-1}-11-01":f"{year}-10-31"]["T"].copy()
    
    snow = states_instance[f"{year-1}-11-01":f"{year}-10-31"]["snowpack"].copy()
    
    p = meteo_instance[f"{year-1}-11-01":f"{year}-10-31"]["P"].copy()
    
    pet = meteo_instance[f"{year-1}-11-01":f"{year}-10-31"]["PET"].copy()
    
    ####################################################################
    
    # Temperature characteristics    
    # Mean annual temparature
    t_mean = t.mean()
    
    # Max annual temperature
    t_max = t.max()
    
    # Min annual temperature
    t_min = t.min()
    
    # Monthly means
    t_mon_means = t.groupby(t.index.month).mean().to_numpy().flatten().tolist()
    
    # Monthly sums
    t_mon_sums = t.groupby(t.index.month).sum().to_numpy().flatten().tolist()
    
    # Number of days with temperatures below zero
    t_numdays_belowzero = len(t[t<0])
    
    # Number of days with above zero temperature while snow (SWE>0)
    t_numdays_thaw = len(t[(snow>0) & (t>0)])
    
    # Sum of temperatures when thaw
    t_sum_thaw = t[(snow>0) & (t>0)].sum()
    
    ####################################################################
    
    # Precipitation characteristics
    
    # Annual sum of precipitation
    p_sum = p.sum()
    
    # Maximum annual precipitation
    p_max = p.max()
    
    # Monthly sums
    p_mon_sums = p.groupby(p.index.month).sum().to_numpy().flatten().tolist()
    
    # Number of rainy days
    p_numdays_rain = len(p[(p>0) & (t>=0)])
    
    # Number of snowy days
    p_numdays_snow = len(p[(p>0) & (t<0)])
    
    # sum liquid precipitation
    p_sum_rain = p[(p>0) & (t>=0)].sum()
    
    # sum solid precipitation
    p_sum_snow = p[(p>0) & (t<0)].sum()
    
    # Ratio of liquid precipitation
    p_rain_ratio = p_sum_rain / p_sum
    
    # Rain on snow
    p_rain_on_snow_numdays = len(p[(p>0) & (snow>0) & (t>=0)])
    
    p_rain_on_snow_sum = p[(p>0) & (snow>0) & (t>=0)].sum()
    
    p_rain_on_snow_ratio = p_rain_on_snow_sum / p_sum
    
    ####################################################################
    
    # PET characteristics
    pet_sum = pet.sum()
    
    # Monthly sums
    pet_mon_sums = pet.groupby(pet.index.month).sum().to_numpy().flatten().tolist()
    
    ####################################################################
    
    # Snowpack characteristics
    # Snowpack for hydrological year
    swe = states_instance[f"{year-1}-11-01":f"{year}-10-31"]["snowpack"].copy()
    
    # Mean SWE
    swe_mean = swe.mean()
    
    # Max SWE
    swe_max = swe.max()
    
    # Duration
    swe_duration = len(swe[swe>0])
    
    meteo_output = {}
    
    # averaged charactersictics
    meteo_output["T_mean"] = t_mean
    meteo_output["T_min"] = t_min
    meteo_output["T_max"] = t_max
    
    meteo_output["T_numdays_belowzero"] = t_numdays_belowzero
    meteo_output["T_numdays_thaw"] = t_numdays_thaw
    meteo_output["T_sum_thaw"] = t_sum_thaw
    
    meteo_output["P_sum"] = p_sum
    meteo_output["P_max"] = p_max
    meteo_output["P_numdays_rain"] = p_numdays_rain
    meteo_output["P_numdays_snow"] = p_numdays_snow
    meteo_output["P_sum_rain"] = p_sum_rain
    meteo_output["P_sum_snow"] = p_sum_snow
    meteo_output["P_ratio_rain"] = p_rain_ratio
    meteo_output["P_numdays_rainonsnow"] = p_rain_on_snow_numdays
    meteo_output["P_sum_rainonsnow"] = p_rain_on_snow_sum
    meteo_output["P_ratio_rainonsnow"] = p_rain_on_snow_ratio
    
    meteo_output["PET_sum"] = pet_sum
    
    meteo_output["Snowpack_mean"] = swe_mean
    meteo_output["Snowpack_max"] = swe_max
    meteo_output["Snowpack_numdays"] = swe_duration
    
    # monthly characteristics
    for month in range(1,13):
        
        meteo_output[f"T_mean_{month}"] = t_mon_means[month-1]
        meteo_output[f"T_sum_{month}"] = t_mon_sums[month-1]
        meteo_output[f"P_sum_{month}"] = p_mon_sums[month-1]
        meteo_output[f"PET_sum_{month}"] = pet_mon_sums[month-1]
    
    
    return pd.DataFrame(meteo_output, index=[year])

In [4]:
def calculate_characteristics(basin_id, mode="HST", model="MIROC5", scenario="rcp26"):
    
    if mode == "HST":
        
        # define years to consider
        years = [str(i) for i in range(1979,2017)]
        
        meteo_path = os.path.join(path_to_dataset, "meteo", "forcing", "HST", f"{basin_id}.csv")
        states_path = os.path.join(path_to_dataset, "hydro", "states", "HST", f"{basin_id}.csv")
        
    elif mode == "PRJ":
        
        # define years to consider
        years = [str(i) for i in range(2016,2100)]
        
        meteo_path = os.path.join(path_to_dataset, "meteo", "forcing", "PRJ", model, scenario, f"{basin_id}.csv")
        states_path = os.path.join(path_to_dataset, "hydro", "states", "PRJ", model, scenario, f"{basin_id}.csv")
        
    # read states simulation data
    
    meteo =  pd.read_csv(meteo_path, 
                         index_col=0, 
                         parse_dates=True)
    states = pd.read_csv(states_path, 
                         index_col=0, 
                         parse_dates=True)

    # clip to period
    meteo = meteo[years[0]:years[-1]].copy()
    states = states[years[0]:years[-1]].copy()
    
    meteo_holder = []
    
    for year in years[1:]:
        
        meteo_chars_ = meteo_characteristics(meteo, states, int(year))
        
        meteo_holder.append(meteo_chars_)
    
    output = pd.concat(meteo_holder)
    
    return output

In [5]:
%%time
nvkzh = calculate_characteristics(10240)

CPU times: user 805 ms, sys: 7.51 ms, total: 813 ms
Wall time: 812 ms


In [6]:
%%time
nvkzf = calculate_characteristics(10240, mode="PRJ")

CPU times: user 1.81 s, sys: 3.56 ms, total: 1.81 s
Wall time: 1.81 s
