In [19]:
import numpy as np
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile

In [20]:
#Read main data
data=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\data_values.xlsx',sheet_name='data_transpose')
rainfall_data=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\rainfall_data.xlsx',sheet_name='Sheet1')

# Read data necessary for normalization
ind_type=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\indicator_type.xlsx',sheet_name='Sheet1')

In [21]:
#defining normalization function for (dam capicity)/(surface rwr)
def damcap_normalize(ser1,ser2):
    result=ser1*0
    length=ser1.size
    for i in range(length):
        if ser1[i]>=ser2[i]:
            result[i]=(ser1.max()-ser1[i])/(ser1.max()-ser2[i])
        else:
            result[i]=(ser1[i]-ser1.min())/(ser2[i]-ser1.min())
        if result[i]<0.01:
            result[i]=0.01
    return result  

#defining aggregation functions
def aggregate(df,a=0.5):
    size=len(df.columns)
    result=(df.product(axis=1)**(1/size))*a+df.mean(axis=1)*(1-a)
    return result

#defining weighted aggregation functions
def weighted_addminagg(dfi,dfw,a=0.5):
    weight_array=np.array(dfw)[0]
    dfmin=dfi.min(axis=1)
    weighted_sum=dfi.multiply(weight_array,axis='columns').sum(axis=1)
    result=a*dfmin+(1-a)*weighted_sum
    return result

def weighted_sum(dfi,dfw):
    weight_array=np.array(dfw)[0]
    weighted_sum=dfi.multiply(weight_array,axis='columns').sum(axis=1)
    return weighted_sum

def geomean(dfi):
    size=len(dfi.columns)
    geomean=(dfi.product(axis=1)**(1/size))
    return geomean

def weighted_geomean(dfi,dfw):
    weight_array=np.array(dfw)[0]
    weighted_geomean=dfi.pow(weight_array,axis='columns').product(axis='columns')
    return weighted_geomean

def weighted_addgeo(dfi,dfw,a=0.5):
    weight_array=np.array(dfw)[0]
    weighted_geomean=dfi.pow(weight_array,axis='columns').product(axis='columns')
    weighted_sum=dfi.multiply(weight_array,axis='columns').sum(axis=1)
    result= a*weighted_geomean+(1-a)*weighted_sum
    return result

In [22]:
#selecting rainfall data after 1366 (nearly 30 years)
rainfall_modified=rainfall_data.loc[(rainfall_data['syear']>1366) & (rainfall_data['syear']<1399)]

#making a dictionary of provinces and their respective data
pr=list(rainfall_modified.ostan.unique())
prdict={elem:pd.DataFrame() for elem in pr}
for key in prdict.keys():
    prdict[key]=rainfall_modified[:][rainfall_modified.ostan==key]
    
#making a dictionary consisting of province names as keys and annual precipitation as respective values
annualrain_dict={x:pd.DataFrame() for x in pr}
for key in annualrain_dict.keys():
    annualrain_dict[key]=prdict[key].groupby('syear').precnew.sum()

#calculating coefficients of variation
interannual_varicoef={x:pd.DataFrame() for x in pr}
for key in interannual_varicoef.keys():
    interannual_varicoef[key]=annualrain_dict[key].std()/annualrain_dict[key].mean()
monthly_varicoef={x:pd.DataFrame() for x in pr}
for key in monthly_varicoef.keys():
    monthly_varicoef[key]=prdict[key].precnew.std()/prdict[key].precnew.mean()
annualevap_varicoef={x:pd.DataFrame() for x in pr}

#converting monthly coefficient of variation to dataframe and sorting it based on original data
monthlyvaricoeff_df = pd.DataFrame(monthly_varicoef.items(),columns=['province', 'monthly_varicoeff'])
monthlyvaricoeff_df = monthlyvaricoeff_df.drop([31])
monthlyvaricoeff_df = monthlyvaricoeff_df.set_index('province')
monthlyvaricoeff_df = monthlyvaricoeff_df.reindex(index=data['province'])
monthlyvaricoeff_df = monthlyvaricoeff_df.reset_index()

#converting annual coefficient of variation to dataframe and sorting it based on original data
intanvaricoeff_df = pd.DataFrame(interannual_varicoef.items(),columns=['province', 'rain_coeff_variability'])
intanvaricoeff_df = intanvaricoeff_df.drop([31])
intanvaricoeff_df = intanvaricoeff_df.set_index('province')
intanvaricoeff_df = intanvaricoeff_df.reindex(index=data['province'])
intanvaricoeff_df = intanvaricoeff_df.reset_index()

# adding calculated coefficients of variation to the data
data['rain_coeff_variation']=intanvaricoeff_df['rain_coeff_variability']
data['monthly_varicoeff']= monthlyvaricoeff_df['monthly_varicoeff']

In [23]:
#Creating a dataframe consisting of necessary variables for the calculation of sub-indicators
variables=pd.DataFrame()
variables['province']=data.province
variables['irwr']=data.precipitation-data.evaporation
variables['surf_irwr']=(data.precipitation-data.evaporation)*data.runoff_coeff
variables['withdraw_surf']=data.iloc[:,5:8].sum(axis=1)
variables['gw_irwr']=(data.precipitation-data.evaporation)*(1-data.runoff_coeff)
variables['withdraw_gw']=data.iloc[:,8:11].sum(axis=1)
variables['withdraw_agr']=data.withdraw_gw_agr+data.withdraw_surf_agr
variables['withdraw_ind']=data.withdraw_gw_ind+data.withdraw_surf_ind
variables['access_sanitation_total']=(data.access_sanitation_urban*data.urban_pop_ratio+data.access_sanitation_rural*(1-data.urban_pop_ratio))
variables['deficit_gw_annual']=data.annual_gw_variation*(-1)
variables['deficit_gw_aggregate']=data.aggregate_gw_variation*(-1)
variables['withdraw_total']=np.NaN
variables['withdraw_total']=data[['withdraw_surf_agr','withdraw_surf_ind','withdraw_surf_dom','withdraw_gw_agr','withdraw_gw_ind','withdraw_gw_dom']].sum(axis=1)
variables['agr_withdraw_ratio']=variables['withdraw_agr']/variables['withdraw_total']
variables['access_sanitation_total']=data['access_sanitation_urban']*data['urban_pop_ratio']+data['access_sanitation_rural']*(1-data['urban_pop_ratio'])

#setting negative deficit values equal to zero
variables.loc[variables['deficit_gw_annual'] < 0 ,'deficit_gw_annual']=0
variables.loc[variables['deficit_gw_aggregate'] < 0 ,'deficit_gw_aggregate']=0
variables['withdraw_gw_allowable']=variables.gw_irwr-(variables.deficit_gw_aggregate/17)
variables['withdraw_agr_decrease']=variables.withdraw_gw-variables.withdraw_gw_allowable
variables.loc[variables['withdraw_agr_decrease']<0,'withdraw_agr_decrease']=0
variables['agrwat_lost_ratio']=variables.withdraw_agr_decrease/variables.withdraw_agr

In [24]:
#Creating a dataframe for the sub-indicators
#Dimension 1: Resources
sub_indicators=pd.DataFrame()
sub_indicators['rain_coeff_variation']=data['rain_coeff_variation']
sub_indicators['monthly_varicoeff']=data['monthly_varicoeff']
sub_indicators['gw_agdef_gwrwr']=variables.deficit_gw_aggregate/variables.gw_irwr
sub_indicators['gw_andef_withdraw']=variables.deficit_gw_annual/variables.withdraw_gw
sub_indicators['agr_dependency_gw']=data.withdraw_gw_agr/variables.withdraw_agr
sub_indicators['ind_dependency_gw']=data.withdraw_gw_ind/variables.withdraw_ind
sub_indicators['urbanwat_dependency_gw']=data.withdraw_urban_gw/data.produced_urban_wat
sub_indicators['ruralwat_dependency_gw']=data.withdraw_rural_gw/data.produced_rural_wat
sub_indicators['withdraw_surf_ratio']=variables.withdraw_surf/variables.surf_irwr
sub_indicators['withdraw_gw_ratio']=variables.withdraw_gw/variables.gw_irwr
sub_indicators['anomaly_rain']=abs(data['anomaly_rain'])
sub_indicators['anomaly_temp']=data['anomaly_temp']
sub_indicators['irwr_percap']=(variables.irwr*(10**6)/data.population)/(1+data['pop_growth']/100)
#Dimension 2: access

sub_indicators['access_wat_urban']=data['access_wat_urban']
sub_indicators['access_wat_rural']=data['access_wat_rural']
sub_indicators['under_stress_pop']=data['under_stress_pop']
sub_indicators['access_sanitation_urban']=data['access_sanitation_urban']
sub_indicators['access_sanitation_rural']=data['access_sanitation_rural']
sub_indicators['treated_municipal_wastewater']=data.waste_facility_cap/data.daily_produced_waste
sub_indicators['quality_proxy']=data.urban_fam_treatwat/data.total_urban_fam
sub_indicators['damcap_rwr_ratio']=data.dam_cap/variables.surf_irwr

#Dimension 3:Economy
sub_indicators['efficiency_agr']=data.agr_added_value/variables.withdraw_agr
sub_indicators['modern_irrig']=data.land_irrig_modern/data.land_irrig_tot
sub_indicators['employment_lost_agr']=variables.agrwat_lost_ratio*data.agr_employment
sub_indicators['unaccounted_wat_urban']=data['unaccounted_wat_urban']
sub_indicators['unaccounted_wat_rural']=data['unaccounted_wat_rural']
sub_indicators['efficiency_ind']=data.ind_added_value/variables.withdraw_ind

sub_indicators.index=variables['province']
sub_indicators.loc['khuz','damcap_rwr_ratio']=1
sub_indicators['quality_proxy']
sub_indicators[['rain_coeff_variation','monthly_varicoeff']].mean(axis=1).sort_values()

province
mazan    0.320530
gilan    0.356372
gole     0.411456
arda     0.414114
azsh     0.478862
azgh     0.495823
zanj     0.546068
khosh    0.547858
tehr     0.587937
albo     0.597068
qazv     0.599065
kord     0.608566
semn     0.656052
hame     0.660863
marka    0.674192
kersh    0.677325
khor     0.696926
lores    0.699727
chba     0.720313
qom      0.721756
esfe     0.734589
ilam     0.787864
khuz     0.823896
khoj     0.866060
yazd     0.875006
kohbu    0.883690
fars     0.951149
kerm     0.963779
sisba    1.109006
bush     1.152501
hormo    1.162718
dtype: float64

In [25]:
#Normalizing Data
normalized=sub_indicators*0
max_allowable_withdraw=variables['withdraw_gw_allowable']/variables['gw_irwr']
max_allowable_withdraw.index=sub_indicators.index

for col in ind_type.columns:
    i=ind_type.columns.get_loc(col)
    if ind_type.loc[2,col]=='b': # The bigger the better indicators
        normalized.loc[sub_indicators[col]>=ind_type.loc[0,col],col]=1
        normalized.loc[sub_indicators[col]<=ind_type.loc[1,col],col]=0.01
        cond=(sub_indicators[col]>ind_type.loc[1,col])& (sub_indicators[col]<ind_type.loc[0,col])
        normalized.loc[cond,col]=(sub_indicators.loc[cond,col]-ind_type.loc[1,col])/(ind_type.loc[0,col]-ind_type.loc[1,col])
    elif ind_type.loc[2,col]=='l':      # The lower the better indicators
        normalized.loc[sub_indicators[col]<=ind_type.loc[0,col],col]=1
        normalized.loc[sub_indicators[col]>=ind_type.loc[1,col],col]=0.01
        cond=(sub_indicators[col]<ind_type.loc[1,col])& (sub_indicators[col]>ind_type.loc[0,col])
        normalized.loc[cond,col]=(ind_type.loc[1,col]-sub_indicators.loc[cond,col])/(ind_type.loc[1,col]-ind_type.loc[0,col])
    elif ind_type.loc[2,col]=='diff':    # GW withdrawal to rwr ratio normalization
        cond1=(sub_indicators[col] > max_allowable_withdraw)
        normalized.loc[cond1,col]=0.01
        cond2=sub_indicators[col]<0.25
        normalized.loc[cond2,col]=1
        cond3=(sub_indicators[col] < max_allowable_withdraw) & (sub_indicators[col]>0.25)
        normalized.loc[cond3,col]=(max_allowable_withdraw.loc[cond3]-sub_indicators.loc[cond3,col])/(max_allowable_withdraw.loc[cond3]-0.25)    

In [26]:
# dam capacity to surface rwr ratio normalization    
variation_coeff_agg=pd.Series(normalized[['rain_coeff_variation','monthly_varicoeff']].mean(axis=1),index=variables['province'])
variation_coeff_bins=pd.cut(variation_coeff_agg,4,labels=['Q1','Q2','Q3','Q4'])
ideal_damcap=pd.Series(index=variables['province'],dtype='float64') 
ideal_damcap[variation_coeff_bins=='Q1']=1
ideal_damcap[variation_coeff_bins=='Q2']=0.9
ideal_damcap[variation_coeff_bins=='Q3']=0.8
ideal_damcap[variation_coeff_bins=='Q4']=0.7
normalized['damcap_rwr_ratio']=damcap_normalize(sub_indicators['damcap_rwr_ratio'],ideal_damcap)   
normalized.loc['khuz','damcap_rwr_ratio']=1

# modifying modern irrigation indicator vlues based on aggregate GW reservoir deficits
gw_deficit_agg=pd.Series(aggregate(normalized[['gw_agdef_gwrwr','gw_andef_withdraw']]),index=variables['province'])
gw_deficit_bins=pd.cut(gw_deficit_agg,4,labels=['Q1','Q2','Q3','Q4'])
gw_modifier=pd.Series(index=variables['province'],dtype='float64')
gw_modifier[gw_deficit_bins=='Q1']=0.7
gw_modifier[gw_deficit_bins=='Q2']=0.8
gw_modifier[gw_deficit_bins=='Q3']=0.9
gw_modifier[gw_deficit_bins=='Q4']=1
normalized['modern_irrig']=normalized['modern_irrig'].multiply(gw_modifier)

for col in normalized.columns:
    normalized.loc[normalized[col]<0.01,col]=0.01

In [27]:
res_sub_columns=['ACV','MCV','Dag','Dan','GWDagr','GWDind','GWDurb','GWDrur','SWS','GWS','APA','ATA','IRWR']
acc_sub_columns=['WAurb','WArur','USP','SAurb','SArur','TPC','WQ','DC']
eco_sub_columns=['AE','MI','EL','NRWurb','NRWrur','IE']
sub_columns1_list= ['R1','R1','R2','R2','R3','R3','R3','R3','R4','R4','R5','R5','R7']+['A1','A1','A1','A2','A2','A2','A3','A3']+['E1','E1','E1','E2','E2','E3']
sub_columns2_list=res_sub_columns+acc_sub_columns+eco_sub_columns
arraycolumns=[sub_columns1_list]+[sub_columns2_list]
normalized_pp=pd.DataFrame(normalized).round(decimals=4)
normalized_pp.columns=sub_columns2_list
normalized_pp.head()

Unnamed: 0_level_0,ACV,MCV,Dag,Dan,GWDagr,GWDind,GWDurb,GWDrur,SWS,GWS,...,SArur,TPC,WQ,DC,AE,MI,EL,NRWurb,NRWrur,IE
province,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
azsh,0.8302,0.5918,0.7581,0.8357,0.569,0.4438,0.5411,0.202,0.357,0.0284,...,0.01,0.3865,0.7836,0.4161,0.6485,0.1898,1.0,0.704,0.478,0.422
azgh,0.7622,0.5862,0.7553,0.9169,0.5936,0.5256,0.5206,0.0363,0.386,0.01,...,0.075,0.3893,0.9417,0.7736,0.3468,0.1606,0.6779,0.582,0.634,0.0898
arda,0.9108,0.6729,0.6559,0.5503,0.844,0.0214,0.7503,0.4051,0.01,0.3708,...,0.03,0.4302,0.7355,0.5807,0.5183,0.0829,1.0,0.4867,0.612,0.1878
esfe,0.5431,0.2613,0.01,0.3068,0.326,0.7052,0.857,0.5009,0.2615,0.01,...,0.04,0.6079,0.8122,0.6042,0.1655,0.2295,0.7487,0.7953,0.608,0.3744
albo,0.7328,0.4273,0.0638,0.5578,0.3591,0.0523,0.2335,0.027,0.7754,0.01,...,0.1,0.1469,0.8892,0.9999,0.7073,0.1952,0.9144,0.59,0.282,0.1654


In [28]:
# normalized_lvl2=pd.DataFrame(normalized_ahp[('R4','SWS')]*0,index=sub_indicators.index)
# normalized_lvl2.columns=pd.MultiIndex.from_arrays([['R5'],['CV']], names=('Aggregation Level', 'Indicator'))
# normalized_lvl2[('R5','CV')]=geomean(normalized_ahp.loc[:,'R1'],ahp_weights['R1']).round(decimals=4)
# normalized_lvl2[('R6','GD')]=weighted_sum(normalized_ahp.loc[:,'R2'],ahp_weights['R2']).round(decimals=4)
# normalized_lvl2[('R6','GWD')]=weighted_sum(normalized_ahp.loc[:,'R3'],ahp_weights['R3']).round(decimals=4)
# normalized_lvl2.head()

In [35]:
## Assessing water security using ageomtric mean aggregation rules

#  calculating indicators 
indicators_pp=pd.DataFrame(index=sub_indicators.index)
temp=pd.DataFrame(index=sub_indicators.index)
temp['GD']=geomean(normalized_pp[['Dag','Dan']])
temp['GWD']=geomean(normalized_pp[['GWDagr','GWDind','GWDurb','GWDrur']])
# indicators_pp.columns=pd.MultiIndex.from_arrays([['Resource'],['IRWR']], names=('Dimension', 'Indicator'))
indicators_pp['IRWR']=normalized_pp['IRWR']
indicators_pp['CC']=geomean(normalized_pp[['ACV','MCV','APA','ATA']]).round(decimals=4)
indicators_pp['WS']=geomean(normalized_pp[['SWS','GWS']]).round(decimals=4)
# if a province has low groundwater reservoir deficit then high dependency on groundwater is considered okay i.e. if GD> 0.8 then GWD not involved in aggregation
indicators_pp.loc[temp['GD']<0.8,'GW']=geomean(temp[['GD','GWD']]).round(decimals=4)
indicators_pp.loc[temp['GD']>0.8,'GW']=temp['GD']
indicators_pp['DW']=geomean(normalized_pp[['WAurb','WArur','USP']]).round(decimals=4)
indicators_pp['WQ']=normalized_pp['WQ']
indicators_pp['DC']=normalized_pp['DC']
indicators_pp['SS']=geomean(normalized_pp[['SAurb','SArur','TPC']]).round(decimals=4)
indicators_pp['Eagr']=geomean(normalized_pp[['AE','MI','EL']]).round(decimals=4)
indicators_pp['IE']=normalized_pp['IE']
indicators_pp['NRW']=geomean(normalized_pp[['NRWurb','NRWrur']]).round(decimals=4)

# #  calculating dimensions
Dimensions=pd.DataFrame(index=sub_indicators.index)
Dimensions['Resource']=geomean(indicators_pp[['IRWR','CC','WS','GW']]).round(decimals=4)
Dimensions['Access']=geomean(indicators_pp[['DW','WQ','DC','SS']]).round(decimals=4)
Dimensions['Economy']=geomean(indicators_pp[['Eagr','IE','NRW']]).round(decimals=4)

# #  calculating WSI
Dimensions['WSI']=geomean(Dimensions[['Resource','Access','Economy']]).round(decimals=4)
Dimensions.sort_values('WSI',ascending=False)

Dimensions.to_excel('WSI_geomean4.xlsx')

In [32]:
## Assessing water security using ageomtric mean aggregation rules

#  calculating indicators 
indicators_pp=pd.DataFrame(index=sub_indicators.index)
temp=pd.DataFrame(index=sub_indicators.index)
temp['GD']= normalized_pp[['Dag','Dan']].mean(axis=1)
temp['GWD']= normalized_pp[['GWDagr','GWDind','GWDurb','GWDrur']].mean(axis=1)
# indicators_pp.columns=pd.MultiIndex.from_arrays([['Resource'],['IRWR']], names=('Dimension', 'Indicator'))
indicators_pp['IRWR']=normalized_pp['IRWR']
indicators_pp['CC']= normalized_pp[['ACV','MCV','APA','ATA']].mean(axis=1).round(decimals=4)
indicators_pp['WS']= normalized_pp[['SWS','GWS']].mean(axis=1).round(decimals=4)
# if a province has low groundwater reservoir deficit then high dependency on groundwater is considered okay i.e. if GD> 0.8 then GWD not involved in aggregation
indicators_pp.loc[temp['GD']<0.8,'GW']= temp[['GD','GWD']].mean(axis=1).round(decimals=4)
indicators_pp.loc[temp['GD']>0.8,'GW']=temp['GD']
indicators_pp['DW']= normalized_pp[['WAurb','WArur','USP']].mean(axis=1).round(decimals=4)
indicators_pp['WQ']=normalized_pp['WQ']
indicators_pp['DC']=normalized_pp['DC']
indicators_pp['SS']= normalized_pp[['SAurb','SArur','TPC']].mean(axis=1).round(decimals=4)
indicators_pp['Eagr']= normalized_pp[['AE','MI','EL']].mean(axis=1).round(decimals=4)
indicators_pp['IE']=normalized_pp['IE']
indicators_pp['NRW']= normalized_pp[['NRWurb','NRWrur']].mean(axis=1).round(decimals=4)

# #  calculating dimensions
Dimensions=pd.DataFrame(index=sub_indicators.index)
Dimensions['Resource']= geomean(indicators_pp[['IRWR','CC','WS','GW']]).round(decimals=4)
Dimensions['Access']= geomean(indicators_pp[['DW','WQ','DC','SS']]).round(decimals=4)
Dimensions['Economy']= geomean(indicators_pp[['Eagr','IE','NRW']]).round(decimals=4)

# #  calculating WSI
Dimensions['WSI']=geomean( Dimensions[['Resource','Access','Economy']]).round(decimals=4)
Dimensions.sort_values('WSI',ascending=False)


Unnamed: 0_level_0,Resource,Access,Economy,WSI
province,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
hormo,0.4754,0.5249,0.6711,0.5512
marka,0.3991,0.5805,0.5337,0.4982
azsh,0.4395,0.5147,0.5347,0.4945
esfe,0.3494,0.6328,0.4644,0.4683
mazan,0.8138,0.245,0.5079,0.4661
azgh,0.5257,0.6487,0.2784,0.4562
arda,0.4201,0.5321,0.3804,0.4397
gilan,0.8926,0.513,0.1843,0.4386
kersh,0.7152,0.3772,0.2765,0.421
fars,0.4612,0.4606,0.3334,0.4137
