In [179]:
import numpy as np
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile

In [180]:
#Read main data
data=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\data_values.xlsx',sheet_name='data_transpose')
rainfall_data=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\rainfall_data.xlsx',sheet_name='Sheet1')

# Read data necessary for normalization
ind_type=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\indicator_type.xlsx',sheet_name='Sheet1')

In [181]:
#defining normalization function for (dam capicity)/(surface rwr)
def damcap_normalize(ser1,ser2):
    result=ser1*0
    length=ser1.size
    for i in range(length):
        if ser1[i]>=ser2[i]:
            result[i]=(ser1.max()-ser1[i])/(ser1.max()-ser2[i])
        else:
            result[i]=(ser1[i]-ser1.min())/(ser2[i]-ser1.min())
        if result[i]<0.01:
            result[i]=0.01
    return result  

#defining aggregation functions
def aggregate(df,a=0.5):
    size=len(df.columns)
    result=(df.product(axis=1)**(1/size))*a+df.mean(axis=1)*(1-a)
    return result

#defining weighted aggregation functions
def weighted_addminagg(dfi,dfw,a=0.5):
    weight_array=np.array(dfw)[0]
    dfmin=dfi.min(axis=1)
    weighted_sum=dfi.multiply(weight_array,axis='columns').sum(axis=1)
    result=a*dfmin+(1-a)*weighted_sum
    return result

def weighted_sum(dfi,dfw):
    weight_array=np.array(dfw)
    weighted_sum=dfi.multiply(weight_array,axis='columns').sum(axis=1)
    return weighted_sum

def geomean(dfi):
    size=len(dfi.columns)
    weighted_geomean=dfi.pow(weight_array,axis='columns').product(axis='columns')
    return weighted_geomean

def weighted_geomean(dfi,dfw):
    weight_array=np.array(dfw)
    weighted_geomean=dfi.pow(weight_array,axis='columns').product(axis='columns')
    return weighted_geomean

def weighted_addgeo(dfi,dfw,a=0.5):
    weight_array=np.array(dfw)[0]
    weighted_geomean=dfi.pow(weight_array,axis='columns').product(axis='columns')
    weighted_sum=dfi.multiply(weight_array,axis='columns').sum(axis=1)
    result= a*weighted_geomean+(1-a)*weighted_sum
    return result

In [182]:
#selecting rainfall data after 1366 (nearly 30 years)
rainfall_modified=rainfall_data.loc[(rainfall_data['syear']>1366) & (rainfall_data['syear']<1399)]

#making a dictionary of provinces and their respective data
pr=list(rainfall_modified.ostan.unique())
prdict={elem:pd.DataFrame() for elem in pr}
for key in prdict.keys():
    prdict[key]=rainfall_modified[:][rainfall_modified.ostan==key]
    
#making a dictionary consisting of province names as keys and annual precipitation as respective values
annualrain_dict={x:pd.DataFrame() for x in pr}
for key in annualrain_dict.keys():
    annualrain_dict[key]=prdict[key].groupby('syear').precnew.sum()

#calculating coefficients of variation
interannual_varicoef={x:pd.DataFrame() for x in pr}
for key in interannual_varicoef.keys():
    interannual_varicoef[key]=annualrain_dict[key].std()/annualrain_dict[key].mean()
monthly_varicoef={x:pd.DataFrame() for x in pr}
for key in monthly_varicoef.keys():
    monthly_varicoef[key]=prdict[key].precnew.std()/prdict[key].precnew.mean()
annualevap_varicoef={x:pd.DataFrame() for x in pr}

#converting monthly coefficient of variation to dataframe and sorting it based on original data
monthlyvaricoeff_df = pd.DataFrame(monthly_varicoef.items(),columns=['province', 'monthly_varicoeff'])
monthlyvaricoeff_df = monthlyvaricoeff_df.drop([31])
monthlyvaricoeff_df = monthlyvaricoeff_df.set_index('province')
monthlyvaricoeff_df = monthlyvaricoeff_df.reindex(index=data['province'])
monthlyvaricoeff_df = monthlyvaricoeff_df.reset_index()

#converting annual coefficient of variation to dataframe and sorting it based on original data
intanvaricoeff_df = pd.DataFrame(interannual_varicoef.items(),columns=['province', 'rain_coeff_variability'])
intanvaricoeff_df = intanvaricoeff_df.drop([31])
intanvaricoeff_df = intanvaricoeff_df.set_index('province')
intanvaricoeff_df = intanvaricoeff_df.reindex(index=data['province'])
intanvaricoeff_df = intanvaricoeff_df.reset_index()

# adding calculated coefficients of variation to the data
data['rain_coeff_variation']=intanvaricoeff_df['rain_coeff_variability']
data['monthly_varicoeff']= monthlyvaricoeff_df['monthly_varicoeff']

In [183]:
#Creating a dataframe consisting of necessary variables for the calculation of sub-indicators
variables=pd.DataFrame()
variables['province']=data.province
variables['irwr']=data.precipitation-data.evaporation
variables['surf_irwr']=(data.precipitation-data.evaporation)*data.runoff_coeff
variables['withdraw_surf']=data.iloc[:,5:8].sum(axis=1)
variables['gw_irwr']=(data.precipitation-data.evaporation)*(1-data.runoff_coeff)
variables['withdraw_gw']=data.iloc[:,8:11].sum(axis=1)
variables['withdraw_agr']=data.withdraw_gw_agr+data.withdraw_surf_agr
variables['withdraw_ind']=data.withdraw_gw_ind+data.withdraw_surf_ind
variables['access_sanitation_total']=(data.access_sanitation_urban*data.urban_pop_ratio+data.access_sanitation_rural*(1-data.urban_pop_ratio))
variables['deficit_gw_annual']=data.annual_gw_variation*(-1)
variables['deficit_gw_aggregate']=data.aggregate_gw_variation*(-1)
variables['withdraw_total']=np.NaN
variables['withdraw_total']=data[['withdraw_surf_agr','withdraw_surf_ind','withdraw_surf_dom','withdraw_gw_agr','withdraw_gw_ind','withdraw_gw_dom']].sum(axis=1)
variables['agr_withdraw_ratio']=variables['withdraw_agr']/variables['withdraw_total']
variables['access_sanitation_total']=data['access_sanitation_urban']*data['urban_pop_ratio']+data['access_sanitation_rural']*(1-data['urban_pop_ratio'])

#setting negative deficit values equal to zero
variables.loc[variables['deficit_gw_annual'] < 0 ,'deficit_gw_annual']=0
variables.loc[variables['deficit_gw_aggregate'] < 0 ,'deficit_gw_aggregate']=0
variables['withdraw_gw_allowable']=variables.gw_irwr-(variables.deficit_gw_aggregate/17)
variables['withdraw_agr_decrease']=variables.withdraw_gw-variables.withdraw_gw_allowable
variables.loc[variables['withdraw_agr_decrease']<0,'withdraw_agr_decrease']=0
variables['agrwat_lost_ratio']=variables.withdraw_agr_decrease/variables.withdraw_agr

In [184]:
# calculating trade-off ratio between modern irrigation and employment lost

water_saved_perhec=variables['withdraw_agr']/(2*data['land_irrig_tot']-data['land_irrig_modern'])

# wwdpmii= water withdrawal decrease per modern irrigation increase (percent)
wwdpmii=water_saved_perhec*0.01*data['land_irrig_tot']
#wdn=water decrease necessary for compensating the effect of lowering 1 percent modern irrigation
wdn=variables['withdraw_agr_decrease']+wwdpmii
#elsftr=employment lost for tradeoff ratio & mitr=modern irrigation for tradeoff ratio
eltr=wdn*data.agr_employment/variables.withdraw_agr
mitr=data.land_irrig_modern/data.land_irrig_tot-0.01
eln=variables['withdraw_agr_decrease']*data.agr_employment/variables.withdraw_agr
tradeoff_ratio=(eltr-eln)*100/18


In [185]:
#Creating a dataframe for the sub-indicators
#Dimension 1: Resources
sub_indicators=pd.DataFrame()
sub_indicators=pd.DataFrame()

sub_indicators['rain_coeff_variation']=(data['rain_coeff_variation']-data['rain_coeff_variation'].min())/(data['rain_coeff_variation'].max()-data['rain_coeff_variation'].min())
sub_indicators['monthly_varicoeff']=(data['monthly_varicoeff']-data['monthly_varicoeff'].min())/(data['monthly_varicoeff'].max()-data['monthly_varicoeff'].min())
sub_indicators['anomaly_rain']=abs(data['anomaly_rain'])
sub_indicators['anomaly_temp']=data['anomaly_temp']

sub_indicators['gw_agdef_gwrwr']=variables.deficit_gw_aggregate/variables.gw_irwr
sub_indicators['gw_andef_withdraw']=variables.deficit_gw_annual/variables.withdraw_gw

sub_indicators['agr_dependency_gw']=data.withdraw_gw_agr/variables.withdraw_agr
sub_indicators['ind_dependency_gw']=data.withdraw_gw_ind/variables.withdraw_ind
sub_indicators['urbanwat_dependency_gw']=data.withdraw_urban_gw/data.produced_urban_wat
sub_indicators['ruralwat_dependency_gw']=data.withdraw_rural_gw/data.produced_rural_wat

sub_indicators['withdraw_surf_ratio']=variables.withdraw_surf/variables.surf_irwr
sub_indicators['withdraw_gw_ratio']=variables.withdraw_gw/variables.gw_irwr

sub_indicators['irwr_percap']=(variables.irwr*(10**6)/data.population)/(1+data['pop_growth']/100)
#Dimension 2: access

sub_indicators['access_wat_urban']=data['access_wat_urban']
sub_indicators['access_wat_rural']=data['access_wat_rural']
sub_indicators['under_stress_pop']=data['under_stress_pop']
sub_indicators['access_sanitation_urban']=data['access_sanitation_urban']
sub_indicators['access_sanitation_rural']=data['access_sanitation_rural']
sub_indicators['treated_municipal_wastewater']=data.waste_facility_cap/data.daily_produced_waste
sub_indicators['quality_proxy']=data.urban_fam_treatwat/data.total_urban_fam
sub_indicators['damcap_rwr_ratio']=data.dam_cap/variables.surf_irwr

#Dimension 3:Economy
sub_indicators['efficiency_agr']=data.agr_added_value/variables.withdraw_agr
sub_indicators['modern_irrig']=data.land_irrig_modern/data.land_irrig_tot
sub_indicators['employment_lost_agr']=variables.agrwat_lost_ratio*data.agr_employment
sub_indicators['unaccounted_wat_urban']=data['unaccounted_wat_urban']
sub_indicators['unaccounted_wat_rural']=data['unaccounted_wat_rural']
sub_indicators['efficiency_ind']=data.ind_added_value/variables.withdraw_ind

sub_indicators.index=variables['province']
sub_indicators.loc['khuz','damcap_rwr_ratio']=1


In [186]:
#Normalizing Data
normalized=sub_indicators*0
max_allowable_withdraw=variables['withdraw_gw_allowable']/variables['gw_irwr']
max_allowable_withdraw.index=sub_indicators.index

for col in ind_type.columns:
    i=ind_type.columns.get_loc(col)
    if ind_type.loc[2,col]=='b': # The bigger the better indicators
        normalized.loc[sub_indicators[col]>=ind_type.loc[0,col],col]=1
        normalized.loc[sub_indicators[col]<=ind_type.loc[1,col],col]=0.01
        cond=(sub_indicators[col]>ind_type.loc[1,col])& (sub_indicators[col]<ind_type.loc[0,col])
        normalized.loc[cond,col]=(sub_indicators.loc[cond,col]-ind_type.loc[1,col])/(ind_type.loc[0,col]-ind_type.loc[1,col])
    elif ind_type.loc[2,col]=='l':      # The lower the better indicators
        normalized.loc[sub_indicators[col]<=ind_type.loc[0,col],col]=1
        normalized.loc[sub_indicators[col]>=ind_type.loc[1,col],col]=0.01
        cond=(sub_indicators[col]<ind_type.loc[1,col])& (sub_indicators[col]>ind_type.loc[0,col])
        normalized.loc[cond,col]=(ind_type.loc[1,col]-sub_indicators.loc[cond,col])/(ind_type.loc[1,col]-ind_type.loc[0,col])
    elif ind_type.loc[2,col]=='diff':    # GW withdrawal to rwr ratio normalization
        cond1=(sub_indicators[col] > max_allowable_withdraw)
        normalized.loc[cond1,col]=0.01
        cond2=sub_indicators[col]<0.25
        normalized.loc[cond2,col]=1
        cond3=(sub_indicators[col] < max_allowable_withdraw) & (sub_indicators[col]>0.25)
        normalized.loc[cond3,col]=(max_allowable_withdraw.loc[cond3]-sub_indicators.loc[cond3,col])/(max_allowable_withdraw.loc[cond3]-0.25)      

In [187]:
res_sub_columns=['ACV','MCV','DIag','DIan','GWDagr','GWDind','GWDurb','GWDrur','SWS','GWS','PA','TA','RWP']
acc_sub_columns=['CWAurb','CWArur','USP','SAurb','SArur','TPC','WQP','DC']
eco_sub_columns=['AP','MI','EL','NRWurb','NRWrur','IP']
sub_columns1_list= ['R1','R1','R2','R2','R3','R3','R3','R3','R4','R4','R5','R5','R7']+['A1','A1','A1','A2','A2','A2','A3','A3']+['E1','E1','E1','E2','E2','E3']
sub_columns2_list=res_sub_columns+acc_sub_columns+eco_sub_columns
arraycolumns=[sub_columns1_list]+[sub_columns2_list]
sub_indicators_lvl1=pd.DataFrame(sub_indicators).round(decimals=4)
sub_indicators_lvl1.columns=pd.MultiIndex.from_arrays(arraycolumns, names=('Aggregation Level', 'Indicator'))
sub_indicators_lvl1.drop([('R7','RWP'),('A3','WQP'),('A3','DC'),('E3','IP')],axis=1,inplace=True)
sub_indicators_lvl1.iloc[:5,:8]

Aggregation Level,R1,R1,R2,R2,R3,R3,R3,R3
Indicator,ACV,MCV,DIag,DIan,GWDagr,GWDind,GWDurb,GWDrur
province,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
azsh,0.0853,0.2109,5.0009,1.2218,0.9674,0.0411,0.431,0.5562
azgh,0.142,0.216,0.1947,1.6438,0.9789,0.0208,0.4064,0.4744
arda,0.0181,0.1365,9.741,1.1031,1.3766,0.1124,0.156,0.9786
esfe,0.3248,0.5142,11.1196,0.8887,5.4352,0.1733,0.674,0.2948
albo,0.1665,0.3619,12.6169,0.8668,3.7448,0.1105,0.6409,0.9477


In [188]:
# dam capacity to surface rwr ratio normalization    
variation_coeff_agg=pd.Series(normalized[['rain_coeff_variation','monthly_varicoeff']].mean(axis=1),index=variables['province'])
variation_coeff_bins=pd.cut(variation_coeff_agg,4,labels=['Q1','Q2','Q3','Q4'])
ideal_damcap=pd.Series(index=variables['province'],dtype='float64') 
ideal_damcap[variation_coeff_bins=='Q1']=1
ideal_damcap[variation_coeff_bins=='Q2']=0.9
ideal_damcap[variation_coeff_bins=='Q3']=0.8
ideal_damcap[variation_coeff_bins=='Q4']=0.7
normalized['damcap_rwr_ratio']=damcap_normalize(sub_indicators['damcap_rwr_ratio'],ideal_damcap)   
normalized.loc['khuz','damcap_rwr_ratio']=1

# modifying modern irrigation indicator vlues based on aggregate GW reservoir deficits
gw_deficit_agg=pd.Series(aggregate(normalized[['gw_agdef_gwrwr','gw_andef_withdraw']]),index=variables['province'])
gw_deficit_bins=pd.qcut(gw_deficit_agg,4,labels=['Q1','Q2','Q3','Q4'])
gw_modifier=pd.Series(index=variables['province'],dtype='float64')
gw_modifier[gw_deficit_bins=='Q1']=0.8
gw_modifier[gw_deficit_bins=='Q2']=0.9
gw_modifier[gw_deficit_bins=='Q3']=1
gw_modifier[gw_deficit_bins=='Q4']=1
normalized['modern_irrig']=normalized['modern_irrig'].multiply(gw_modifier)


In [189]:
#importing relevant functions to perform PCA
from numpy import eye, asarray, dot, sum, diag
from numpy.linalg import svd
from scipy.sparse.linalg import eigs
from scipy.sparse.linalg import eigsh

#defining function for performing PCA
def PCA(df):
    st=df*0
    for col in df.columns:
        st[col]=(df[col]-df[col].mean())/df[col].std()
    st_cov=np.matrix(st.cov())
    size= len(st_cov)
    eigenvals, temp =eigs(st_cov,k=(size-1))
    eigenvals=eigenvals.real
    temp,eigenvec= eigs(st_cov,k=1,which='LM')
    eigenvec=eigenvec.real
    return (eigenvals,eigenvec)


In [190]:
PCA_weight_lvl1=pd.Series(sub_indicators_lvl1.iloc[0,:8]*0,name='lvl1_weights')
explained_variance_lvl1=[]

for col in ['R1','R2','R3']:
    PCA_weight_lvl1[col]=abs(PCA(sub_indicators_lvl1[col])[1].transpose()[0])
    eigvalsum=PCA(sub_indicators_lvl1[col])[0].sum()
    eigvalmax=PCA(sub_indicators_lvl1[col])[0].max()
    explained_variance_lvl1.append((col,eigvalmax/eigvalsum))

#normalising weights so they add up to unity
for col1 in ['R1','R2','R3']:
    temp=PCA_weight_lvl1[col1].sum()
    for col2 in sub_columns2_list:
        if (col1,col2) in PCA_weight_lvl1.index:            
            PCA_weight_lvl1[(col1,col2)]=(PCA_weight_lvl1[(col1,col2)]/temp).round(4)
PCA_weight_lvl1



Aggregation Level  Indicator
R1                 ACV          0.5000
                   MCV          0.5000
R2                 DIag         0.5000
                   DIan         0.5000
R3                 GWDagr       0.3313
                   GWDind       0.1135
                   GWDurb       0.3128
                   GWDrur       0.2424
Name: lvl1_weights, dtype: float64

In [191]:
explained_variance_lvl1

[('R1', 0.9562286743970412),
 ('R2', 0.5084201506042278),
 ('R3', 0.5078247218459151)]

In [192]:
normalized_pca=pd.DataFrame(normalized).round(decimals=4)
normalized_pca.columns=pd.MultiIndex.from_arrays(arraycolumns, names=('Aggregation Level', 'Indicator'))
normalized_pca_lvl2=normalized_pca.drop(['R1','R2','R3','R7','A3','E3'],axis='columns')
temp=sub_indicators_lvl1.iloc[:,:8]*0
for i in PCA_weight_lvl1.index:
    temp[i]=normalized_pca[i]*PCA_weight_lvl1[i]
normalized_pca_lvl2[('R5','CV')]=temp['R1'].sum(axis=1)
normalized_pca_lvl2[('R6','DI')]=temp['R2'].sum(axis=1)
normalized_pca_lvl2[('R6','GWD')]=temp['R3'].sum(axis=1)

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


In [102]:
#calculating sub_indicator weights using PCA
PCA_weight_lvl2=pd.Series(normalized_pca_lvl2.iloc[0,:]*0,name='lvl2_weights')
PCA_weight_lvl2
explained_variance_lvl2=[]
indicators_list=sub_columns2_list+['CV','DI','GWD']
for col in ['R4','R5','R6','A1','A2','E1','E2']:
    PCA_weight_lvl2[col]=abs(PCA(normalized_pca_lvl2[col])[1].transpose()[0])
    eigvalsum=PCA(normalized_pca_lvl2[col])[0].sum()
    eigvalmax=PCA(normalized_pca_lvl2[col])[0].max()
    explained_variance_lvl2.append((col,eigvalmax/eigvalsum))

# normalising weights so they add up to unity
for col1 in ['R4','R5','R6','A1','A2','E1','E2']:
    temp=PCA_weight_lvl2[col1].sum()
    for col2 in indicators_list:
        if (col1,col2) in PCA_weight_lvl2.index:            
            PCA_weight_lvl2[(col1,col2)]=(PCA_weight_lvl2[(col1,col2)]/temp).round(4)
PCA_weight_lvl2

Aggregation Level  Indicator
R4                 SWS          0.5000
                   GWS          0.5000
R5                 PA           0.4542
                   TA           0.3877
A1                 CWAurb       0.4951
                   CWArur       0.4959
                   USP          0.0089
A2                 SAurb        0.3486
                   SArur        0.2617
                   TPC          0.3897
E1                 AP           0.3104
                   MI           0.2952
                   EL           0.3944
E2                 NRWurb       0.5000
                   NRWrur       0.5000
R5                 CV           0.1580
R6                 DI           0.5000
                   GWD          0.5000
Name: lvl2_weights, dtype: float64

In [193]:
explained_variance_lvl2

[('R4', 0.8586302222558396),
 ('R5', 0.49220252853619595),
 ('R6', 0.5831585555325042),
 ('A1', 0.4961070369169338),
 ('A2', 0.5642041007149579),
 ('E1', 0.4451278416100695),
 ('E2', 0.5607245309684057)]

In [136]:
# calculating weighted sub_indicators
indicators_weighted=normalized_pca_lvl2*0
# multiplying normalized sub_indicators by their respective weights
for i in PCA_weight_lvl2.index:
    indicators_weighted[i]=normalized_pca_lvl2[i]*PCA_weight_lvl2[i]
    
# calculating indicators from weighted sub_indicators

indicators=pd.DataFrame(index=indicators_weighted.index)
indicators['WS']=indicators_weighted['R4'].sum(axis=1)
indicators['CCI']=indicators_weighted['R5'].sum(axis=1)
indicators['GW']=indicators_weighted['R6'].sum(axis=1)
indicators['RWP']=normalized_pca[('R7','RWP')]

indicators['CW']=indicators_weighted['A1'].sum(axis=1)
indicators['SS']=indicators_weighted['A2'].sum(axis=1)
indicators['WQP']=normalized_pca[('A3','WQP')]
indicators['DC']=normalized_pca[('A3','DC')]

indicators['Eagr']=indicators_weighted['E1'].sum(axis=1)
indicators['NRW']=indicators_weighted['E2'].sum(axis=1)
indicators['IP']=normalized_pca[('E3','IP')]
indicators_columns=list(indicators.columns)
ind_multicol=[['R7','R7','R7','R7','A3','A3','A3','A3','E3','E3','E3']]+[indicators_columns]
indicators.columns=pd.MultiIndex.from_arrays(ind_multicol, names=('Dimension', 'Indicator'))
indicators

Dimension,R7,R7,R7,R7,A3,A3,A3,A3,E3,E3,E3
Indicator,WS,CCI,GW,RWP,CW,SS,WQP,DC,Eagr,NRW,IP
province,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
azsh,0.37155,0.33116,0.591735,0.267,0.791871,0.361002,0.632,0.4161,0.651723,0.591,0.422
azgh,0.27845,0.328903,0.603266,0.6868,0.752151,0.385343,0.9008,0.7736,0.42242,0.608,0.0898
arda,0.5777,0.306301,0.5154,0.34,0.753003,0.340771,0.5504,0.5807,0.582468,0.54935,0.1878
esfe,0.67895,0.222159,0.411224,0.3901,0.704794,0.491003,0.6807,0.6802,0.424089,0.70165,0.3744
albo,0.13025,0.49685,0.347176,0.01,0.715639,0.165721,0.8116,0.9446,0.65428,0.436,0.1654
ilam,0.19475,0.661615,0.514722,1.0,0.79498,0.340266,0.5489,0.2132,0.583357,0.6025,0.01
bush,0.8348,0.054031,0.432864,0.2345,0.536535,0.099801,0.01,0.6369,0.679719,0.48385,0.9746
tehr,0.23845,0.148755,0.40793,0.01,0.778437,0.470611,0.5369,0.4571,0.611558,0.49265,0.7377
chba,0.0442,0.860526,0.495789,1.0,0.777741,0.351381,0.9096,0.5129,0.632079,0.417,0.0145
khoj,0.02525,0.454843,0.224759,0.7917,0.738944,0.183078,0.2531,0.1958,0.185409,0.37565,0.0153


In [139]:
#calculating sub_indicator weights using PCA for arithmetic agg method
PCA_weight_indicators=pd.Series(indicators.iloc[0,:]*0,name='indicator_weights')
explained_indvariance=[]

for col in ['R7','A3','E3']:
    PCA_weight_indicators[col]=abs(PCA(indicators[col])[1].transpose()[0])
    eigvalsum=PCA(indicators[col])[0].sum()
    eigvalmax=PCA(indicators[col])[0].max()
    explained_indvariance.append((col,eigvalmax/eigvalsum))

#normalising weights so they add up to unity
for col1 in ['R7','R7','R7','R7','A3','A3','A3','A3','E3','E3','E3']:
    temp=PCA_weight_indicators[col1].sum()
    for col2 in indicators_columns:
        if (col1,col2) in PCA_weight_indicators.index:            
            PCA_weight_indicators[(col1,col2)]=(PCA_weight_indicators[(col1,col2)]/temp).round(4)
PCA_weight_indicators

Dimension  Indicator
R7         WS           0.1388
           CCI          0.3151
           GW           0.2430
           RWP          0.3030
A3         CW           0.2467
           SS           0.3645
           WQP          0.1333
           DC           0.2555
E3         Eagr         0.1467
           NRW          0.3962
           IP           0.4571
Name: indicator_weights, dtype: float64

In [140]:
Dimensions=pd.DataFrame(index=normalized_pca.index)
Dimensions['Res']=weighted_sum(indicators['R7'],PCA_weight_indicators['R7'])
Dimensions['Acc']=weighted_sum(indicators['A3'],PCA_weight_indicators['A3'])
Dimensions['Eco']=weighted_sum(indicators['E3'],PCA_weight_indicators['E3'])
Dimensions.sort_values('Res')

Unnamed: 0_level_0,Res,Acc,Eco
province,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
khor,0.152636,0.529591,0.243539
tehr,0.182126,0.551936,0.622106
qom,0.211311,0.319329,0.490356
semn,0.228776,0.368886,0.296582
gole,0.24616,0.567997,0.356607
albo,0.26203,0.586485,0.34433
qazv,0.293696,0.442606,0.406072
yazd,0.298944,0.340236,0.439894
bush,0.309135,0.332801,0.736906
sisba,0.339297,0.454567,0.276497


In [148]:
PCA_WSI = abs(PCA(Dimensions)[1].transpose()[0])
# PCA_WSI = PCA_weight_WSI/(PCA_weight_WSI.sum())
PCA_weight_WSI=pd.Series(PCA_WSI,index=[['WSI','WSI','WSI'],['Res','Acc','Eco']])
PCA_weight_WSI=PCA_weight_WSI/(PCA_weight_WSI.sum())
PCA_weight_WSI

WSI  Res    0.449544
     Acc    0.105794
     Eco    0.444662
dtype: float64

In [150]:
WSI=weighted_sum(Dimensions,PCA_weight_WSI)
WSI.sort_values()

province
khor     0.232937
semn     0.273750
khoj     0.314988
sisba    0.323567
gole     0.329320
albo     0.332951
zanj     0.336789
qom      0.346819
qazv     0.359419
yazd     0.365988
kerm     0.367286
khosh    0.374813
hame     0.381862
khuz     0.392011
arda     0.411249
tehr     0.416892
azgh     0.444441
kersh    0.452918
azsh     0.458257
fars     0.458924
esfe     0.464582
kohbu    0.465744
ilam     0.491915
chba     0.493208
bush     0.501852
gilan    0.521786
kord     0.525103
lores    0.543647
mazan    0.551811
marka    0.568130
hormo    0.617574
dtype: float64

In [177]:
PCA_weights=pd.concat([PCA_weight_lvl1,PCA_weight_lvl2[['R4','R5','R6']],PCA_weight_indicators[['R7']]]).sort_index()
PCA_weights=pd.concat([PCA_weights,PCA_weight_lvl2[['A1','A2']],PCA_weight_indicators[['A3']]])
PCA_weights=pd.concat([PCA_weights,PCA_weight_lvl2[['E1','E2']],PCA_weight_indicators[['E3']]])
PCA_weights=pd.concat([PCA_weight_WSI,PCA_weights])
PCA_weights=PCA_weights.round(decimals=4)
PCA_weights=pd.DataFrame(PCA_weights).transpose()
PCA_weights

Unnamed: 0_level_0,WSI,WSI,WSI,R1,R1,R2,R2,R3,R3,R3,...,A3,A3,E1,E1,E1,E2,E2,E3,E3,E3
Unnamed: 0_level_1,Res,Acc,Eco,ACV,MCV,DIag,DIan,GWDagr,GWDind,GWDrur,...,WQP,DC,AP,MI,EL,NRWurb,NRWrur,Eagr,NRW,IP
0,0.4495,0.1058,0.4447,0.5,0.5,0.5,0.5,0.3313,0.1135,0.2424,...,0.1333,0.2555,0.3104,0.2952,0.3944,0.5,0.5,0.1467,0.3962,0.4571


In [178]:
# PCA_weights.to_csv('PCA_weights.csv')