In [5]:
from dask_jobqueue import PBSCluster
from dask.distributed import Client,progress
from dask import delayed
from dask import compute
from dask.diagnostics import*
import dask
import numpy as np
import pandas as pd
from tqdm import tqdm
# import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# from mpl_toolkits.basemap import Basemap
import seaborn as sns
import matplotlib.dates as mdates
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.lib.pagesizes import portrait
from reportlab.platypus import Image
from reportlab.platypus import Table
from reportlab.lib import colors
import os, sys, glob
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import config 
import warnings
warnings.filterwarnings('ignore')

# from objectives import pbias,nashsutcliffe,kge,rmse,correlationcoefficient,mae

In [6]:
# Load pre-requisite data
stations_list = pd.read_csv('../LIS_Data/EXP002/valid_stations_list.csv')
meta_file = pd.read_csv('../LIS_Data/EXP002/gaugemetadata.csv')
meta_file.reset_index(drop = True,inplace=True)
meta_file.set_index('GaugeID',inplace=True)
obj_fn = [config.pbias,config.nashsutcliffe,config.kge,
          config.rmse,config.correlationcoefficient,config.mae,config.rrmse]
obj_fn_names = ['PBIAS','NSE','KGE','RMSE','CORR','MAE','MAPE']
error_columns = ['GaugeID','Latitude','Longitude','PBIAS','NSE','KGE','RMSE','CORR','MAE','RRMSE','obs_avg','sim_avg','MAPE']
error_df = pd.DataFrame(columns = error_columns).set_index('GaugeID')

In [7]:
#Create list of all gauge stations with NaNs dropped and export to CSVs(if needed).
stations = []
monsoon_months = [10,11,12]
for i in tqdm(range(0,len(stations_list))):
    key = stations_list.loc[i,'stations']
    station = pd.read_csv('../LIS_Data/EXP002/'+key+'-merged.csv')
    station.set_index('Date',inplace=True)
    station.index = pd.to_datetime(station.index)
    station.dropna(inplace=True)
    station = station[station.index.map(lambda t: t.month in monsoon_months)]
    stations.append(station)
# for k in tqdm(range(0,len(stations_list))):
#     key = stations_list.loc[k,'stations']
#     stations[k].to_csv('../../final_results/IWM-Validation-EXP002/rawdata/gaugestreamflow/merged/ne_monsoon/'+key+'_ne_monsoon.csv')

100%|██████████| 284/284 [00:34<00:00,  8.12it/s]


In [4]:
obs_flow = stations[0]['Streamflow (cumecs)']
sim_flow = stations[0]['Streamflow_tavg']
obs, sim = np.array(obs_flow), np.array(sim_flow)
nmae = config.nmae(obs,sim)
nmae

168.76398244917473

In [8]:
new_row = [None]*len(stations)
for i in tqdm (range(0,len(stations))):
    try:
        obs_flow = stations[i]['Streamflow (cumecs)']
        sim_flow = stations[i]['Streamflow_tavg']
        meta_key = stations_list.loc[i,'stations']
        s_info= {'lat':meta_file.at[meta_key,'Latitude'],
                'lon':meta_file.at[meta_key,'Longitude'],
                'name':str(meta_file.at[meta_key,'Name']),
                'basin':str(meta_file.at[meta_key,'Basin']),
                'state':str(meta_file.at[meta_key,'State'])}
        error_columns = ['GaugeID','Latitude','Longitude','PBIAS','NSE','KGE','CORR','Alpha','Beta','RMSE','MAE','RRMSE','obs_avg','sim_avg','NMAE']
        error_df = pd.DataFrame(columns = error_columns).set_index('GaugeID')
        new_row[i]= pd.DataFrame ([[meta_key,s_info['lat'],s_info['lon'],
                                    round(config.pbias(obs_flow,sim_flow),3),
                                    round(config.nashsutcliffe(obs_flow,sim_flow),3),
                                    round(config.kge(obs_flow,sim_flow,return_all="True")[0],3),
                                    round(config.kge(obs_flow,sim_flow,return_all="True")[1],3),
                                    round(config.kge(obs_flow,sim_flow,return_all="True")[2],3),
                                    round(config.kge(obs_flow,sim_flow,return_all="True")[3],3),
                                    round(config.rmse(obs_flow,sim_flow),3),
                                    round(config.mae(obs_flow,sim_flow),3),
                                    round(config.rrmse(obs_flow,sim_flow),3),
                                    round(obs_flow.mean(),3),
                                    round(sim_flow.mean(),3),
                                    round(config.nmae(obs_flow,sim_flow),3)]],
                                    columns=(['GaugeID','Latitude','Longitude','PBIAS','NSE','KGE','CORR','Alpha','Beta','RMSE','MAE','RRMSE','obs_avg','sim_avg','NMAE'])).set_index('GaugeID')
        #Create the main plot ##
#         fig, axes = plt.subplots(ncols=1, nrows=6, figsize=(9,14), tight_layout=True)
#         myFmt = mdates.DateFormatter('%b-%y')
#         stations[i]['TotalPrecip_tavg'].plot(ax=axes[0])
#         axes[0].set_title('')
#         axes[0].xaxis.set_major_formatter(mdates.DateFormatter('%b-%y'))
#         axes[0].set_ylabel('Precipitation \n(kgm-2s-1)',rotation='vertical')
#         axes[0].yaxis.set_label_coords(-0.11, 0.5)   
#         axes[0].tick_params('x', labelrotation=0)
#         stations[i]['Evap_tavg'].plot(ax=axes[1])
#         axes[1].set_title('')
#         axes[1].xaxis.set_major_formatter(mdates.DateFormatter('%b-%y'))
#         axes[1].set_ylabel('Evapotranspiration \n(kgm-2s-1)',rotation='vertical')
#         axes[1].yaxis.set_label_coords(-0.11, 0.5)   
#         axes[1].tick_params('x', labelrotation=0)
#         stations[i]['SM_L1'].plot(ax=axes[2],label='Layer 1')
#         stations[i]['SM_L2'].plot(ax=axes[2],label='Layer 2')
#         stations[i]['SM_L3'].plot(ax=axes[2],label='Layer 3')
#         stations[i]['SM_L4'].plot(ax=axes[2],label='Layer 4')
#         axes[2].set_title('')
#         axes[2].legend(loc="upper right")
#         axes[2].xaxis.set_major_formatter(mdates.DateFormatter('%b-%y'))
#         axes[2].set_ylabel('Soil Moisture \n(kg m-2)',rotation='vertical')
#         axes[2].yaxis.set_label_coords(-0.11, 0.5)   
#         axes[2].tick_params('x', labelrotation=0)
#         stations[i]['RiverDepth_tavg'].plot(ax=axes[3])
#         axes[3].set_title('')
#         axes[3].xaxis.set_major_formatter(mdates.DateFormatter('%b-%y'))
#         axes[3].set_ylabel('River Depth \n (m)',rotation='vertical')
#         axes[3].yaxis.set_label_coords(-0.11, 0.5)   
#         axes[3].tick_params('x', labelrotation=0)
#         stations[i]['SWS_tavg'].plot(ax=axes[4])
#         axes[4].set_title('')
#         axes[4].xaxis.set_major_formatter(mdates.DateFormatter('%b-%y'))
#         axes[4].set_ylabel('Surface Water Storage \n(mm))',rotation='vertical')
#         axes[4].yaxis.set_label_coords(-0.11, 0.5)   
#         axes[4].tick_params('x', labelrotation=0)
#         stations[i]['Streamflow_tavg'].plot(ax=axes[5],label='Simulated')
#         stations[i]['Streamflow (cumecs)'].plot(ax=axes[5],label='Observed')
#         axes[5].set_title('')
#         axes[5].xaxis.set_major_formatter(mdates.DateFormatter('%b-%y'))
#         axes[5].legend(loc="upper right")
#         axes[5].set_ylabel('Streamflow \n (m3s-1)',rotation='vertical')
#         axes[5].yaxis.set_label_coords(-0.11, 0.5)   
#         axes[5].tick_params('x', labelrotation=0)
#         plt.savefig('../figures/gaugestreamflow/'+meta_key+'_main.png',dpi = 300)
#         plt.close()
#         ##Create the scatter plot ##
#         plt.figure(figsize=(6,5))
#         ax = sns.regplot(x=stations[i]['Streamflow (cumecs)'], y=stations[i]['Streamflow_tavg'])
#         ax.set_title("Streamflow (m3s-1)")
#         plt.xlabel("Observed")
#         plt.ylabel("Simulated")
#         fig_s = ax.get_figure()
#         fig_s.savefig('../figures/gaugestreamflow/'+meta_key+'_scatter.png',dpi=300)
#         plt.close()
#         # Create png for error metrics ##
#         fig_tab, ax_tab = plt.subplots()
#         fig_tab.patch.set_visible(False)
#         ax_tab.axis('off')
#         ax_tab.axis('tight')
#         table = ax_tab.table(cellText=obj_pd.values, colLabels=obj_pd.columns, loc='center',cellLoc='center')
#         table.scale(1,2)
#         table.set_fontsize(14)
#         fig_tab.tight_layout()
#         plt.savefig('../figures/gaugestreamflow/'+meta_key+'_error.png',dpi=300)
#         plt.close()
#         ## Create station wise pdf composite report(merging to be done externally using PDFSam) ## 
#         c = canvas.Canvas('../reports/gaugestreamflow/'+meta_key+'.pdf',pagesize=portrait(A4))
#         c.drawCentredString(300,750,"Validation of LIS simulations in " + meta_key + " (" + str(s_info['name']) + ", " + str(s_info['state'])+ ")")
#         mainfig = '../figures/gaugestreamflow/'+ meta_key + '_main.png'
#         tablefig = '../figures/gaugestreamflow/'+ meta_key + '_error.png'
#         scatterfig='../figures/gaugestreamflow/'+ meta_key + '_scatter.png'
#         c.drawImage(mainfig,50,100,width=300,height=600)
#         c.drawImage(scatterfig,350,505,width = 210, height = 210)
#         c.drawImage(tablefig,375,280,width = 175 , height=173)
#         c.drawCentredString(460,440,"Error Metrics")
#         c.line(50,800,550,800)
#         c.line(50,50,550,50)
#         c.showPage()
#         c.save()
    except Exception as e:
        print(str(e))
        continue

 35%|███▍      | 98/284 [00:01<00:02, 92.11it/s]

'Streamflow (cumecs)'


 49%|████▊     | 138/284 [00:01<00:01, 93.42it/s]

float division by zero


100%|██████████| 284/284 [00:03<00:00, 92.02it/s]


In [10]:
for i in range(0,len(new_row)):
    error_df = error_df.append(new_row[i]) 
error_df.to_csv('../../final_results/IWM-Validation-EXP002/rawdata/gaugestreamflow/merged/ne_monsoon/error_metrics.csv')