In [14]:
import numpy as np
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile

In [15]:
#Read main data
data=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\data_values.xlsx',sheet_name='data_transpose')
rainfall_data=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\rainfall_data.xlsx',sheet_name='Sheet1')

# Read data necessary for normalization
ind_type=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\indicator_type.xlsx',sheet_name='Sheet1')

In [16]:
#defining normalization function for (dam capicity)/(surface rwr)
def damcap_normalize(ser1,ser2):
    result=ser1*0
    length=ser1.size
    for i in range(length):
        if ser1[i]>=ser2[i]:
            result[i]=(ser1.max()-ser1[i])/(ser1.max()-ser2[i])
        else:
            result[i]=(ser1[i]-ser1.min())/(ser2[i]-ser1.min())
        if result[i]<0.01:
            result[i]=0.01
    return result  

#defining aggregation function
def aggregate(df,a=0.5):
    size=len(df.columns)
    result=(df.product(axis=1)**(1/size))*a+df.mean(axis=1)*(1-a)
    return result
def addminagg(df,a=0.5):
    size=len(df.columns)
    result=(df.min(axis=1))*a+df.mean(axis=1)*(1-a)
    return result

In [17]:
#selecting rainfall data after 1366 (nearly 30 years)
rainfall_modified=rainfall_data.loc[(rainfall_data['syear']>1366) & (rainfall_data['syear']<1399)]

#making a dictionary of provinces and their respective data
pr=list(rainfall_modified.ostan.unique())
prdict={elem:pd.DataFrame() for elem in pr}
for key in prdict.keys():
    prdict[key]=rainfall_modified[:][rainfall_modified.ostan==key]
    
#making a dictionary consisting of province names as keys and annual precipitation as respective values
annualrain_dict={x:pd.DataFrame() for x in pr}
for key in annualrain_dict.keys():
    annualrain_dict[key]=prdict[key].groupby('syear').precnew.sum()

#calculating coefficients of variation
interannual_varicoef={x:pd.DataFrame() for x in pr}
for key in interannual_varicoef.keys():
    interannual_varicoef[key]=annualrain_dict[key].std()/annualrain_dict[key].mean()
monthly_varicoef={x:pd.DataFrame() for x in pr}
for key in monthly_varicoef.keys():
    monthly_varicoef[key]=prdict[key].precnew.std()/prdict[key].precnew.mean()
annualevap_varicoef={x:pd.DataFrame() for x in pr}

#converting monthly coefficient of variation to dataframe and sorting it based on original data
monthlyvaricoeff_df = pd.DataFrame(monthly_varicoef.items(),columns=['province', 'monthly_varicoeff'])
monthlyvaricoeff_df = monthlyvaricoeff_df.drop([31])
monthlyvaricoeff_df = monthlyvaricoeff_df.set_index('province')
monthlyvaricoeff_df = monthlyvaricoeff_df.reindex(index=data['province'])
monthlyvaricoeff_df = monthlyvaricoeff_df.reset_index()

#converting annual coefficient of variation to dataframe and sorting it based on original data
intanvaricoeff_df = pd.DataFrame(interannual_varicoef.items(),columns=['province', 'rain_coeff_variability'])
intanvaricoeff_df = intanvaricoeff_df.drop([31])
intanvaricoeff_df = intanvaricoeff_df.set_index('province')
intanvaricoeff_df = intanvaricoeff_df.reindex(index=data['province'])
intanvaricoeff_df = intanvaricoeff_df.reset_index()

# adding calculated coefficients of variation to the data
data['rain_coeff_variation']=intanvaricoeff_df['rain_coeff_variability']
data['monthly_varicoeff']= monthlyvaricoeff_df['monthly_varicoeff']

In [18]:
#Creating a dataframe consisting of necessary variables for the calculation of sub-indicators
variables=pd.DataFrame()
variables['province']=data.province
variables['irwr']=data.precipitation-data.evaporation
variables['surf_irwr']=(data.precipitation-data.evaporation)*data.runoff_coeff
variables['withdraw_surf']=data.iloc[:,5:8].sum(axis=1)
variables['gw_irwr']=(data.precipitation-data.evaporation)*(1-data.runoff_coeff)
variables['withdraw_gw']=data.iloc[:,8:11].sum(axis=1)
variables['withdraw_agr']=data.withdraw_gw_agr+data.withdraw_surf_agr
variables['withdraw_ind']=data.withdraw_gw_ind+data.withdraw_surf_ind
variables['access_sanitation_total']=(data.access_sanitation_urban*data.urban_pop_ratio+data.access_sanitation_rural*(1-data.urban_pop_ratio))
variables['deficit_gw_annual']=data.annual_gw_variation*(-1)
variables['deficit_gw_aggregate']=data.aggregate_gw_variation*(-1)
variables['withdraw_total']=np.NaN
variables['withdraw_total']=data[['withdraw_surf_agr','withdraw_surf_ind','withdraw_surf_dom','withdraw_gw_agr','withdraw_gw_ind','withdraw_gw_dom']].sum(axis=1)
variables['agr_withdraw_ratio']=variables['withdraw_agr']/variables['withdraw_total']
variables['access_sanitation_total']=data['access_sanitation_urban']*data['urban_pop_ratio']+data['access_sanitation_rural']*(1-data['urban_pop_ratio'])

#setting negative deficit values equal to zero
variables.loc[variables['deficit_gw_annual'] < 0 ,'deficit_gw_annual']=0
variables.loc[variables['deficit_gw_aggregate'] < 0 ,'deficit_gw_aggregate']=0
variables['withdraw_gw_allowable']=variables.gw_irwr-(variables.deficit_gw_aggregate/17)
variables['withdraw_agr_decrease']=variables.withdraw_gw-variables.withdraw_gw_allowable
variables.loc[variables['withdraw_agr_decrease']<0,'withdraw_agr_decrease']=0
variables['agrwat_lost_ratio']=variables.withdraw_agr_decrease/variables.withdraw_agr

In [19]:
#Creating a dataframe for the sub-indicators
#Dimension 1: Resources
sub_indicators=pd.DataFrame()
sub_indicators['irwr_percap']=variables.irwr*(10**6)/data.population
sub_indicators['pop_growth']=1+data['pop_growth']/100
sub_indicators['irwr_percap']=sub_indicators['irwr_percap']/sub_indicators['pop_growth']
sub_indicators['rain_coeff_variation']=data['rain_coeff_variation']
sub_indicators['monthly_varicoeff']=data['monthly_varicoeff']
sub_indicators['anomaly_rain']=abs(data['anomaly_rain'])
sub_indicators['anomaly_temp']=data['anomaly_temp']
sub_indicators['withdraw_surf_ratio']=variables.withdraw_surf/variables.surf_irwr
sub_indicators['withdraw_gw_ratio']=variables.withdraw_gw/variables.gw_irwr
sub_indicators['gw_andef_withdraw']=variables.deficit_gw_annual/variables.withdraw_gw
sub_indicators['gw_agdef_gwrwr']=variables.deficit_gw_aggregate/variables.gw_irwr
sub_indicators['agr_dependency_gw']=data.withdraw_gw_agr/variables.withdraw_agr
sub_indicators['urbanwat_dependency_gw']=data.withdraw_urban_gw/data.produced_urban_wat
sub_indicators['ruralwat_dependency_gw']=data.withdraw_rural_gw/data.produced_rural_wat
sub_indicators['ind_dependency_gw']=data.withdraw_gw_ind/variables.withdraw_ind
#Dimension 2: access
sub_indicators['quality_proxy']=data.urban_fam_treatwat/data.total_urban_fam
sub_indicators['access_wat_urban']=data['access_wat_urban']
sub_indicators['access_wat_rural']=data['access_wat_rural']
sub_indicators['access_sanitation_urban']=data['access_sanitation_urban']
sub_indicators['access_sanitation_rural']=data['access_sanitation_rural']
sub_indicators['treated_municipal_wastewater']=data.waste_facility_cap/data.daily_produced_waste
sub_indicators['under_stress_pop']=data['under_stress_pop']
#Dimension 3:Economy
sub_indicators['efficiency_agr']=data.agr_added_value/variables.withdraw_agr
sub_indicators['modern_irrig']=data.land_irrig_modern/data.land_irrig_tot
sub_indicators['efficiency_ind']=data.ind_added_value/variables.withdraw_ind
sub_indicators['unaccounted_wat_urban']=data['unaccounted_wat_urban']
sub_indicators['unaccounted_wat_rural']=data['unaccounted_wat_rural']
sub_indicators['employment_lost_agr']=variables.agrwat_lost_ratio*data.agr_employment
sub_indicators['damcap_rwr_ratio']=data.dam_cap/variables.surf_irwr
sub_indicators.index=variables['province']
sub_indicators.loc['khuz','damcap_rwr_ratio']=1


In [20]:
#Normalizing Data
normalized=sub_indicators*0
max_allowable_withdraw=variables['withdraw_gw_allowable']/variables['gw_irwr']
max_allowable_withdraw.index=sub_indicators.index

for col in ind_type.columns:
    i=ind_type.columns.get_loc(col)
    if ind_type.loc[2,col]=='b': # The bigger the better indicators
        normalized.loc[sub_indicators[col]>=ind_type.loc[0,col],col]=1
        normalized.loc[sub_indicators[col]<=ind_type.loc[1,col],col]=0.01
        cond=(sub_indicators[col]>ind_type.loc[1,col])& (sub_indicators[col]<ind_type.loc[0,col])
        normalized.loc[cond,col]=(sub_indicators.loc[cond,col]-ind_type.loc[1,col])/(ind_type.loc[0,col]-ind_type.loc[1,col])
    elif ind_type.loc[2,col]=='l':      # The lower the better indicators
        normalized.loc[sub_indicators[col]<=ind_type.loc[0,col],col]=1
        normalized.loc[sub_indicators[col]>=ind_type.loc[1,col],col]=0.01
        cond=(sub_indicators[col]<ind_type.loc[1,col])& (sub_indicators[col]>ind_type.loc[0,col])
        normalized.loc[cond,col]=(ind_type.loc[1,col]-sub_indicators.loc[cond,col])/(ind_type.loc[1,col]-ind_type.loc[0,col])
    elif ind_type.loc[2,col]=='diff':    # GW withdrawal to rwr ratio normalization
        cond1=(sub_indicators[col] > max_allowable_withdraw)
        normalized.loc[cond1,col]=0.01
        cond2=sub_indicators[col]<0.25
        normalized.loc[cond2,col]=1
        cond3=(sub_indicators[col] < max_allowable_withdraw) & (sub_indicators[col]>0.25)
        normalized.loc[cond3,col]=(max_allowable_withdraw.loc[cond3]-sub_indicators.loc[cond3,col])/(max_allowable_withdraw.loc[cond3]-0.25)    
normalized.drop('pop_growth',axis='columns',inplace=True)

In [21]:
# dam capacity to surface rwr ratio normalization    
variation_coeff_agg=pd.Series(normalized[['rain_coeff_variation','monthly_varicoeff']].mean(axis=1),index=variables['province'])
variation_coeff_bins=pd.cut(variation_coeff_agg,4,labels=['Q1','Q2','Q3','Q4'])
ideal_damcap=pd.Series(index=variables['province'],dtype='float64') 
ideal_damcap[variation_coeff_bins=='Q1']=1
ideal_damcap[variation_coeff_bins=='Q2']=0.9
ideal_damcap[variation_coeff_bins=='Q3']=0.8
ideal_damcap[variation_coeff_bins=='Q4']=0.7
normalized['damcap_rwr_ratio']=damcap_normalize(sub_indicators['damcap_rwr_ratio'],ideal_damcap)   
normalized.loc['khuz','damcap_rwr_ratio']=1

# modifying modern irrigation indicator vlues based on aggregate GW reservoir deficits
gw_deficit_agg=pd.Series(aggregate(normalized[['gw_agdef_gwrwr','gw_andef_withdraw']]),index=variables['province'])
gw_deficit_bins=pd.qcut(gw_deficit_agg,4,labels=['Q1','Q2','Q3','Q4'])
gw_modifier=pd.Series(index=variables['province'],dtype='float64')
gw_modifier[gw_deficit_bins=='Q1']=0.8
gw_modifier[gw_deficit_bins=='Q2']=0.9
gw_modifier[gw_deficit_bins=='Q3']=1
gw_modifier[gw_deficit_bins=='Q4']=1
normalized['modern_irrig']=normalized['modern_irrig'].multiply(gw_modifier)

In [22]:
# sub_indicator aggregation using a mix of add-min and add
a=0.5
indicators=pd.DataFrame(index=normalized.index)
temp=pd.DataFrame(index=normalized.index)

#dimension 1
indicators['irwr_percap']=normalized['irwr_percap']
temp['precipitation']=normalized[['rain_coeff_variation','monthly_varicoeff','anomaly_rain']] .mean(axis=1)
temp['anomaly_temp']=normalized['anomaly_temp']
indicators['climate']=addminagg(temp[['precipitation','anomaly_temp']],a)
indicators['water_stress']=addminagg(normalized[['withdraw_surf_ratio','withdraw_gw_ratio']],a)

indicators['deficit_gw']= addminagg(normalized[['gw_andef_withdraw','gw_agdef_gwrwr']],a)
# indicators['deficit_gw']=  normalized[['gw_andef_withdraw','gw_agdef_gwrwr']].min(axis=1)

indicators['dependency_gw']=normalized[['agr_dependency_gw','ruralwat_dependency_gw','urbanwat_dependency_gw','ind_dependency_gw']].mean(axis=1)

#dimension 2
indicators['quality_proxy']=normalized['quality_proxy']
indicators['drinking_water_availability']=addminagg(normalized[['access_wat_urban','access_wat_rural','under_stress_pop']],a)
indicators['sanitation']=addminagg(normalized[['access_sanitation_urban','access_sanitation_rural','treated_municipal_wastewater']],a)
indicators['dam_capacity']=normalized['damcap_rwr_ratio']

#dimension 3
indicators['agriculture_economy']=addminagg(normalized[['efficiency_agr','employment_lost_agr','modern_irrig']],a)
indicators['efficiency_ind']=normalized['efficiency_ind']
indicators['unaccounted_water']=addminagg(normalized[['unaccounted_wat_urban','unaccounted_wat_rural']],a)



In [43]:
# sub_indicators aggregation using simple averaging
a=0.5
indicators=pd.DataFrame(index=normalized.index)
temp=pd.DataFrame(index=normalized.index)

#dimension 1
indicators['irwr_percap']=normalized['irwr_percap']
temp['precipitation']=normalized[['rain_coeff_variation','monthly_varicoeff','anomaly_rain']] .mean(axis=1)
temp['anomaly_temp']=normalized['anomaly_temp']
indicators['climate']=addminagg(temp[['precipitation','anomaly_temp']],a)
indicators['water_stress']=normalized[['withdraw_surf_ratio','withdraw_gw_ratio']].mean(axis=1)
indicators['deficit_gw']=  normalized[['gw_andef_withdraw','gw_agdef_gwrwr']].min(axis=1)
indicators['dependency_gw']=addminagg(normalized[['agr_dependency_gw','ruralwat_dependency_gw','urbanwat_dependency_gw','ind_dependency_gw']],a)

#dimension 2
indicators['quality_proxy']=normalized['quality_proxy']
indicators['drinking_water_availability']=normalized[['access_wat_urban','access_wat_rural','under_stress_pop']].mean(axis=1)
indicators['sanitation']=normalized[['access_sanitation_urban','access_sanitation_rural','treated_municipal_wastewater']].mean(axis=1)
indicators['dam_capacity']=normalized['damcap_rwr_ratio']

#dimension 3
indicators['agriculture_economy']=normalized[['efficiency_agr','employment_lost_agr','modern_irrig']].mean(axis=1)
indicators['efficiency_ind']=normalized['efficiency_ind']
indicators['unaccounted_water']=normalized[['unaccounted_wat_urban','unaccounted_wat_rural']].mean(axis=1)

In [25]:
#Aggregating indicators to calculate dimensions
indicators_d1=pd.DataFrame(indicators.iloc[:,0:5],index=indicators.index)
indicators_d2=pd.DataFrame(indicators.iloc[:,5:9],index=indicators.index)
indicators_d3=pd.DataFrame(indicators.iloc[:,9:],index=indicators.index)
WSI=pd.DataFrame(index=indicators.index)
WSI[['d1','d2','d3']]=0
b=0.5
tp=pd.DataFrame(addminagg(indicators_d1.iloc[:,3:],b))
WSI.loc[indicators_d1['deficit_gw']<0.5,'d1']=addminagg(indicators_d1.iloc[:,0:3].merge(tp,how='outer',right_index=True,left_index=True),b)
# WSI.loc[indicators_d1['deficit_gw']<0.5,'d1']=addminagg(indicators_d1.iloc[:,0:4],b)*0.8+indicators_d1.iloc[:,4]*0.2
WSI.loc[indicators_d1['deficit_gw']>0.5,'d1']=addminagg(indicators_d1.iloc[:,0:4],b)
temp['drinking_water']=indicators[['drinking_water_availability','quality_proxy']].min(axis=1)
temp['sanitation']=indicators['sanitation']
temp['dam_capacity']=indicators['dam_capacity']
WSI['d2']=(0.25*temp['dam_capacity']+0.25*temp['sanitation']+0.5*temp['drinking_water'])*b+(1-b)*temp[['drinking_water','sanitation','dam_capacity']].min(axis=1)
# WSI['d2']=(0.25*temp['dam_capacity']+0.25*temp['sanitation']+0.5*temp['drinking_water'])*b+(1-b)*temp[['drinking_water','sanitation']].min(axis=1)
WSI['d3']=addminagg(indicators_d3,b)
WSI['wsi_mean']=WSI.mean(axis=1)
WSI['wsi_mixed']=addminagg(WSI[['d1','d2','d3']])
WSI['wsi_addgeo']=aggregate(WSI[['d1','d2','d3']])
WSI['wsi_geo']=aggregate(WSI[['d1','d2','d3']],1)
WSI.sort_values('wsi_mixed',ascending=False)


Unnamed: 0_level_0,d1,d2,d3,wsi_mean,wsi_mixed,wsi_addgeo,wsi_geo
province,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
hormo,0.352232,0.275739,0.565975,0.397982,0.336861,0.389104,0.380226
mazan,0.673851,0.20527,0.389677,0.422932,0.314101,0.400338,0.377744
azsh,0.286305,0.284756,0.426958,0.332673,0.308714,0.329591,0.326509
marka,0.276686,0.32578,0.407384,0.336616,0.306651,0.334499,0.332383
lores,0.695183,0.199758,0.233649,0.376197,0.287977,0.347573,0.31895
gilan,0.800521,0.303367,0.12359,0.40916,0.266375,0.359966,0.310772
ilam,0.599694,0.304468,0.172935,0.359032,0.265984,0.337552,0.316071
esfe,0.207043,0.373409,0.35743,0.312627,0.259835,0.307478,0.302328
kord,0.559454,0.365393,0.153487,0.359445,0.256466,0.337424,0.315403
fars,0.314514,0.240493,0.240078,0.265028,0.252553,0.263936,0.262844


In [None]:
WSI.sort_values('wsi_mean',ascending=False)


In [None]:
WSI.sort_values('wsi_addgeo',ascending=False)