In [1]:
#Read main data

import numpy as np
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
data=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\data_values.xlsx',sheet_name='data_transpose')
rainfall_data=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\rainfall_data.xlsx',sheet_name='Sheet1')

In [2]:
#handling rainfall data
#deleting unwanted data (data before 1366)
rainfall_modified=rainfall_data.loc[(rainfall_data['syear']>1366) & (rainfall_data['syear']<1399)]

#making a dictionary of provinces and their respective data
pr=list(rainfall_modified.ostan.unique())
prdict={elem:pd.DataFrame() for elem in pr}
for key in prdict.keys():
    prdict[key]=rainfall_modified[:][rainfall_modified.ostan==key]
    
#making a dictionary consisting of province names as keys and annual precipitation as respective values
annualrain_dict={x:pd.DataFrame() for x in pr}
for key in annualrain_dict.keys():
    annualrain_dict[key]=prdict[key].groupby('syear').precnew.sum()


In [25]:
#making a dictionary consisting of province names as keys and annual precipitation, estimated evapotranspiration and tmean as columns
data_dict={elem:pd.DataFrame() for elem in pr}
for key in data_dict.keys():
    data_dict[key]=rainfall_data[:][rainfall_data.ostan==key]
for key in data_dict.keys():
    data_dict[key]['estimated_evap']=data_dict[key][['precnew','Thontwaite']].min(axis=1)    
annual_dict={x:pd.DataFrame() for x in pr}
for key in annual_dict.keys():
    annual_dict[key]['precipitation']=data_dict[key].groupby('syear').precnew.sum()
    annual_dict[key]['Thornthwaite']=data_dict[key].groupby('syear').Thontwaite.sum()
    annual_dict[key]['estimated_evap']=data_dict[key].groupby('syear').estimated_evap.sum()
    annual_dict[key]['tmean']=data_dict[key].groupby('syear').tmean.mean()

In [27]:
# writing annual data to excel    
# writer = pd.ExcelWriter('annual_data.xlsx', engine='xlsxwriter')
# for sheet, frame in  annual_dict.items():
#     frame.to_excel(writer, sheet_name = sheet)
# writer.save()

In [4]:
#calculating coefficients of variation
interannual_varicoef={x:pd.DataFrame() for x in pr}
for key in interannual_varicoef.keys():
    interannual_varicoef[key]=annualrain_dict[key].std()/annualrain_dict[key].mean()
monthly_varicoef={x:pd.DataFrame() for x in pr}
for key in monthly_varicoef.keys():
    monthly_varicoef[key]=prdict[key].precnew.std()/prdict[key].precnew.mean()


In [5]:
#converting monthly coefficient of variation to dataframe and sorting it based on original data
monthlyvaricoeff_df = pd.DataFrame(monthly_varicoef.items(),columns=['province', 'monthly_varicoeff'])
monthlyvaricoeff_df = monthlyvaricoeff_df.drop([31])
monthlyvaricoeff_df = monthlyvaricoeff_df.set_index('province')
monthlyvaricoeff_df = monthlyvaricoeff_df.reindex(index=data['province'])
monthlyvaricoeff_df = monthlyvaricoeff_df.reset_index()

#converting annual coefficient of variation to dataframe and sorting it based on original data
intanvaricoeff_df = pd.DataFrame(interannual_varicoef.items(),columns=['province', 'rain_coeff_variability'])
intanvaricoeff_df = intanvaricoeff_df.drop([31])
intanvaricoeff_df = intanvaricoeff_df.set_index('province')
intanvaricoeff_df = intanvaricoeff_df.reindex(index=data['province'])
intanvaricoeff_df = intanvaricoeff_df.reset_index()

# adding calculated coefficients of variation to the data
data['rain_coeff_variation']=intanvaricoeff_df['rain_coeff_variability']
data['monthly_varicoeff']= monthlyvaricoeff_df['monthly_varicoeff']

In [6]:
#Creating a dataframe consisting of necessary variables for the calculation of sub-indicators
variables=pd.DataFrame()
variables['province']=data.province
variables['irwr']=data.precipitation-data.evaporation
variables['surf_irwr']=(data.precipitation-data.evaporation)*data.runoff_coeff
variables['withdraw_surf']=data.iloc[:,5:8].sum(axis=1)
variables['gw_irwr']=(data.precipitation-data.evaporation)*(1-data.runoff_coeff)
variables['withdraw_gw']=data.iloc[:,8:11].sum(axis=1)
variables['withdraw_agr']=data.withdraw_gw_agr+data.withdraw_surf_agr
variables['withdraw_ind']=data.withdraw_gw_ind+data.withdraw_surf_ind
variables['access_sanitation_total']=(data.access_sanitation_urban*data.urban_pop_ratio+data.access_sanitation_rural*(1-data.urban_pop_ratio))
variables['deficit_gw_annual']=data.annual_gw_variation*(-1)
variables['deficit_gw_aggregate']=data.aggregate_gw_variation*(-1)

#deleting negative deficits

variables.loc[variables['deficit_gw_annual'] < 0 ,'deficit_gw_annual']=0
variables.loc[variables['deficit_gw_aggregate'] < 0 ,'deficit_gw_aggregate']=0
variables['withdraw_gw_allowable']=variables.gw_irwr-(variables.deficit_gw_aggregate/17)
variables['withdraw_agr_decrease']=variables.withdraw_gw-variables.withdraw_gw_allowable
variables.loc[variables['withdraw_agr_decrease']<0,'withdraw_agr_decrease']=0
variables['agrwat_lost_ratio']=variables.withdraw_agr_decrease/variables.withdraw_agr

In [7]:
#Creating a dataframe for the sub-indicators
#Dimension 1: Resources
sub_indicators=pd.DataFrame()
sub_indicators['province']=data['province']
sub_indicators['irwr_percap']=variables.irwr*(10**6)/data.population
sub_indicators['withdraw_surf_ratio']=variables.withdraw_surf/variables.surf_irwr
sub_indicators['withdraw_gw_ratio']=variables.withdraw_gw/variables.gw_irwr
sub_indicators['gw_andef_withdraw']=variables.deficit_gw_annual/variables.withdraw_gw
sub_indicators['gw_agdef_gwrwr']=variables.deficit_gw_aggregate/variables.gw_irwr
sub_indicators['agr_dependency_gw']=data.withdraw_gw_agr/variables.withdraw_agr
sub_indicators['urbanwat_dependency_gw']=data.withdraw_urban_gw/data.produced_urban_wat
sub_indicators['ruralwat_dependency_gw']=data.withdraw_rural_gw/data.produced_rural_wat
sub_indicators['ind_dependency_gw']=data.withdraw_gw_ind/variables.withdraw_ind
sub_indicators['pop_growth']=data['pop_growth']
#Dimension 2: access
sub_indicators['quality_proxy']=data.urban_fam_treatwat/data.total_urban_fam
sub_indicators['access_wat_urban']=data['access_wat_urban']
sub_indicators['access_wat_rural']=data['access_wat_rural']
sub_indicators['access_sanitation_urban']=data['access_sanitation_urban']
sub_indicators['access_sanitation_rural']=data['access_sanitation_rural']
sub_indicators['under_stress_pop']=data['under_stress_pop']
#Dimension 3:Economy
sub_indicators['efficiency_agr']=data.agr_added_value*(data.land_irrig_tot/data.land_agr_tot)/variables.withdraw_agr
sub_indicators['modern_irrig']=data.land_irrig_modern/data.land_irrig_tot
sub_indicators['efficiency_ind']=data.ind_added_value/variables.withdraw_ind
sub_indicators['unaccounted_wat_urban']=data['unaccounted_wat_urban']
sub_indicators['unaccounted_wat_rural']=data['unaccounted_wat_rural']
sub_indicators['employment_lost_agr']=variables.agrwat_lost_ratio*data.agr_employment
#Dimension 4: Environment                                            
sub_indicators['treated_municipal_wastewater']=data.waste_facility_cap/data.daily_produced_waste
#Dimension 5: Flood & Drought
sub_indicators['damcap_rwr_ratio']=data.dam_cap/variables.surf_irwr
sub_indicators['rain_coeff_variation']=data['rain_coeff_variation']
sub_indicators['monthly_varicoeff']=data['monthly_varicoeff']                                       

In [170]:
#defining normalization functions

def normalize(ser):
    result=ser.copy()
    result=(ser-ser.min())/(ser.max()-ser.min())
    return result
def inv_normalize(ser):
    result=ser.copy()
    result=(ser.max()-ser)/(ser.max()-ser.min())
    return result
def damcap_normalize(ser):
    result=ser.copy()
    length=ser.size
    for i in range(length):
        if ser[i]>=0.7:
            result[i]=(ser.max()-ser[i])/(ser.max()-0.7)
        else:
            result[i]=(ser[i]-ser.min())/(0.7-ser.min())
        if result[i]<0.01:
            result[i]=0.01
    return result  


In [193]:
# Read data necessary for normalization
indicator_type=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\sub_indicators.xlsx',sheet_name='Sheet2')
ind_type=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\sub_indicators.xlsx',sheet_name='Sheet3')
new_sub=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\sub_indicators.xlsx',sheet_name='Sheet1')

In [194]:
# preliminary normalization
indicator_type.iloc[:,1]
norm_indicators=sub_indicators.copy()

for i in range(26):
    column=indicator_type.iloc[i,0]
    if indicator_type.iloc[i,1]=='b':
        norm_indicators[column]= normalize(norm_indicators[column])
    elif indicator_type.iloc[i,1]=='l':
         norm_indicators[column]= inv_normalize(norm_indicators[column])
    else:
        norm_indicators[column]=damcap_normalize(norm_indicators[column])

In [195]:
normalized_final=pd.DataFrame().reindex_like(new_sub)
normalized_final['province']=new_sub['province']
max_allowable_withdraw=variables['withdraw_gw_allowable']/variables['gw_irwr']

for i in range(27):
    col=indicator_type.iloc[i-1,0]
    if ind_type.iloc[2,i]=='b':        # The bigger the better indicators                                        
        for j in range(31):
            if new_sub.iloc[j,i]>ind_type.iloc[0,i]:
                normalized_final.iloc[j,i]=1
            elif  new_sub.iloc[j,i]<ind_type.iloc[1,i]:
                normalized_final.iloc[j,i]=0.01
            else:
                normalized_final.iloc[j,i]= (new_sub.iloc[j,i]-ind_type.iloc[1,i])\
                /(ind_type.iloc[0,i]-ind_type.iloc[1,i])
    elif ind_type.iloc[2,i]=='l':      # The lower the better indicators
        for j in range(31):
            if new_sub.iloc[j,i]<ind_type.iloc[0,i]:
                normalized_final.iloc[j,i]=1
            elif  new_sub.iloc[j,i]>ind_type.iloc[1,i]:
                normalized_final.iloc[j,i]=0.01
            else:
                normalized_final.iloc[j,i]= (ind_type.iloc[1,i]-new_sub.iloc[j,i])\
                /(ind_type.iloc[1,i]-ind_type.iloc[0,i])
    elif ind_type.iloc[2,i]=='diff':    # GW withdrawal to rwr ratio normalization
        for j in range(31):
            if new_sub.iloc[j,i]>max_allowable_withdraw[j]:
                normalized_final.iloc[j,i]=0.01
            else:
                normalized_final.iloc[j,i]=1-new_sub.iloc[j,i]/max_allowable_withdraw[j]
    elif ind_type.iloc[2,i]=='d':      # Dam Capacity normalization
         normalized_final[col]=damcap_normalize(new_sub[col])
#     elif "dependency_gw" in indicator_type.iloc[i-1,0]:      # Dependency on gw indicators normalization
#         for j in range(31):
#             if new_sub.iloc[j,5]<1:
#                 normalized_final.iloc[j,i]=(1-new_sub.iloc[j,i])
#             elif new_sub.iloc[j,5]>1 and new_sub.iloc[j,5]<5:
#                 normalized_final.iloc[j,i]=(1-new_sub.iloc[j,i])*0.9
#             elif new_sub.iloc[j,5]>5 and new_sub.iloc[j,5]<10:
#                 normalized_final.iloc[j,i]=(1-new_sub.iloc[j,i])*0.8
#             else:
#                 normalized_final.iloc[j,i]=(1-new_sub.iloc[j,i])*0.7
    elif indicator_type.iloc[i-1,0]=="pop_growth":
        normalized_final['pop_growth']=1+new_sub['pop_growth']/100
# normalized_final.iloc[24,12]=0.5 

In [196]:
#defining aggregation function
def aggregation(df,a=0.5):
    size=len(df.columns)
    result=(df.product(axis=1)**(1/size))*a+df.mean(axis=1)*(1-a)
    return result

In [197]:
#Aggregating sub_indicators to optain indicators
a=0.5
indicators=pd.DataFrame()
indicators['province']=normalized_final['province']
indicators['irwr_percap']=normalized_final['irwr_percap']
indicators['irwr_stability']=aggregation(normalized_final[['rain_coeff_variation','monthly_varicoeff']],a)
indicators['water_stress']=aggregation(normalized_final[['withdraw_surf_ratio','withdraw_gw_ratio']],a)
indicators['deficit_gw']=aggregation(normalized_final[['gw_andef_withdraw','gw_agdef_gwrwr']],a)
indicators['dependency_gw']=aggregation(normalized_final[['agr_dependency_gw','ruralwat_dependency_gw','urbanwat_dependency_gw','ind_dependency_gw']],a)
indicators['quality_proxy']=normalized_final['quality_proxy']
indicators['access_drinking_water']=aggregation(normalized_final[['access_wat_urban','access_wat_rural']],a)
indicators['under_stress_pop']=normalized_final['under_stress_pop']
indicators['sanitation']=aggregation(normalized_final[['access_sanitation_urban','access_sanitation_rural','treated_municipal_wastewater']],a)
indicators['efficiency_agr']=normalized_final['efficiency_agr']
indicators['modern_irrig']=normalized_final['modern_irrig']
indicators['efficiency_ind']=normalized_final['efficiency_ind']
indicators['unaccounted_water']=aggregation(normalized_final[['unaccounted_wat_urban','unaccounted_wat_rural']],a)
indicators['employment_lost']=normalized_final['employment_lost_agr']
indicators['dam_capacity']=normalized_final['damcap_rwr_ratio']

In [207]:
#Aggregating indicators to obtain water security index
b=0.5
temp=indicators.copy().drop('province',axis='columns')
water_security=pd.DataFrame()
water_security['province']=normalized_final['province']
water_security['wsi']=aggregation(temp,b)
water_security['wsi_mean']=temp.mean(axis=1)
water_security['ratio']=water_security['wsi']/water_security['wsi_mean']

In [208]:
# Calculating water security index
# indicators.to_excel('aggregated indicators.xlsx')

In [209]:
water_security

Unnamed: 0,province,wsi,wsi_mean,ratio
0,azsh,0.444264,0.484287,0.917357
1,azgh,0.442642,0.501036,0.883454
2,arda,0.402384,0.459207,0.876257
3,esfe,0.473309,0.515142,0.918792
4,albo,0.449802,0.5014,0.897093
5,ilam,0.434164,0.549968,0.789436
6,bush,0.363205,0.457887,0.793219
7,tehr,0.419145,0.502134,0.834727
8,chba,0.48836,0.583398,0.837095
9,khoj,0.311192,0.380505,0.817839
