In [2]:
import numpy as np
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile

In [3]:
#Read main data
data=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\data_values.xlsx',sheet_name='data_transpose')
rainfall_data=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\rainfall_data.xlsx',sheet_name='Sheet1')

# Read data necessary for normalization
ind_type=pd.read_excel(r'D:\Personal\University\Master\Thesis\Aggregation-Weighting\indicator_type.xlsx',sheet_name='Sheet1')

In [4]:
#defining normalization function for (dam capicity)/(surface rwr)
def damcap_normalize(ser1,ser2):
    result=ser1*0
    length=ser1.size
    for i in range(length):
        if ser1[i]>=ser2[i]:
            result[i]=(ser1.max()-ser1[i])/(ser1.max()-ser2[i])
        else:
            result[i]=(ser1[i]-ser1.min())/(ser2[i]-ser1.min())
        if result[i]<0.01:
            result[i]=0.01
    return result  

#defining aggregation functions
def aggregate(df,a=0.5):
    size=len(df.columns)
    result=(df.product(axis=1)**(1/size))*a+df.mean(axis=1)*(1-a)
    return result

#defining weighted aggregation functions
def weighted_addminagg(dfi,dfw,a=0.5):
    weight_array=np.array(dfw)[0]
    dfmin=dfi.min(axis=1)
    weighted_sum=dfi.multiply(weight_array,axis='columns').sum(axis=1)
    result=a*dfmin+(1-a)*weighted_sum
    return result

def weighted_sum(dfi,dfw):
    weight_array=np.array(dfw)
    weighted_sum=dfi.multiply(weight_array,axis='columns').sum(axis=1)
    return weighted_sum

def geomean(dfi):
    size=len(dfi.columns)
    weighted_geomean=dfi.pow(weight_array,axis='columns').product(axis='columns')
    return weighted_geomean

def weighted_geomean(dfi,dfw):
    weight_array=np.array(dfw)
    weighted_geomean=dfi.pow(weight_array,axis='columns').product(axis='columns')
    return weighted_geomean

def weighted_addgeo(dfi,dfw,a=0.5):
    weight_array=np.array(dfw)[0]
    weighted_geomean=dfi.pow(weight_array,axis='columns').product(axis='columns')
    weighted_sum=dfi.multiply(weight_array,axis='columns').sum(axis=1)
    result= a*weighted_geomean+(1-a)*weighted_sum
    return result

In [5]:
#selecting rainfall data after 1366 (nearly 30 years)
rainfall_modified=rainfall_data.loc[(rainfall_data['syear']>1366) & (rainfall_data['syear']<1399)]

#making a dictionary of provinces and their respective data
pr=list(rainfall_modified.ostan.unique())
prdict={elem:pd.DataFrame() for elem in pr}
for key in prdict.keys():
    prdict[key]=rainfall_modified[:][rainfall_modified.ostan==key]
    
#making a dictionary consisting of province names as keys and annual precipitation as respective values
annualrain_dict={x:pd.DataFrame() for x in pr}
for key in annualrain_dict.keys():
    annualrain_dict[key]=prdict[key].groupby('syear').precnew.sum()

#calculating coefficients of variation
interannual_varicoef={x:pd.DataFrame() for x in pr}
for key in interannual_varicoef.keys():
    interannual_varicoef[key]=annualrain_dict[key].std()/annualrain_dict[key].mean()
monthly_varicoef={x:pd.DataFrame() for x in pr}
for key in monthly_varicoef.keys():
    monthly_varicoef[key]=prdict[key].precnew.std()/prdict[key].precnew.mean()
annualevap_varicoef={x:pd.DataFrame() for x in pr}

#converting monthly coefficient of variation to dataframe and sorting it based on original data
monthlyvaricoeff_df = pd.DataFrame(monthly_varicoef.items(),columns=['province', 'monthly_varicoeff'])
monthlyvaricoeff_df = monthlyvaricoeff_df.drop([31])
monthlyvaricoeff_df = monthlyvaricoeff_df.set_index('province')
monthlyvaricoeff_df = monthlyvaricoeff_df.reindex(index=data['province'])
monthlyvaricoeff_df = monthlyvaricoeff_df.reset_index()

#converting annual coefficient of variation to dataframe and sorting it based on original data
intanvaricoeff_df = pd.DataFrame(interannual_varicoef.items(),columns=['province', 'rain_coeff_variability'])
intanvaricoeff_df = intanvaricoeff_df.drop([31])
intanvaricoeff_df = intanvaricoeff_df.set_index('province')
intanvaricoeff_df = intanvaricoeff_df.reindex(index=data['province'])
intanvaricoeff_df = intanvaricoeff_df.reset_index()

# adding calculated coefficients of variation to the data
data['rain_coeff_variation']=intanvaricoeff_df['rain_coeff_variability']
data['monthly_varicoeff']= monthlyvaricoeff_df['monthly_varicoeff']

In [6]:
#Creating a dataframe consisting of necessary variables for the calculation of sub-indicators
variables=pd.DataFrame()
variables['province']=data.province
variables['irwr']=data.precipitation-data.evaporation
variables['surf_irwr']=(data.precipitation-data.evaporation)*data.runoff_coeff
variables['withdraw_surf']=data.iloc[:,5:8].sum(axis=1)
variables['gw_irwr']=(data.precipitation-data.evaporation)*(1-data.runoff_coeff)
variables['withdraw_gw']=data.iloc[:,8:11].sum(axis=1)
variables['withdraw_agr']=data.withdraw_gw_agr+data.withdraw_surf_agr
variables['withdraw_ind']=data.withdraw_gw_ind+data.withdraw_surf_ind
variables['access_sanitation_total']=(data.access_sanitation_urban*data.urban_pop_ratio+data.access_sanitation_rural*(1-data.urban_pop_ratio))
variables['deficit_gw_annual']=data.annual_gw_variation*(-1)
variables['deficit_gw_aggregate']=data.aggregate_gw_variation*(-1)
variables['withdraw_total']=np.NaN
variables['withdraw_total']=data[['withdraw_surf_agr','withdraw_surf_ind','withdraw_surf_dom','withdraw_gw_agr','withdraw_gw_ind','withdraw_gw_dom']].sum(axis=1)
variables['agr_withdraw_ratio']=variables['withdraw_agr']/variables['withdraw_total']
variables['access_sanitation_total']=data['access_sanitation_urban']*data['urban_pop_ratio']+data['access_sanitation_rural']*(1-data['urban_pop_ratio'])

#setting negative deficit values equal to zero
variables.loc[variables['deficit_gw_annual'] < 0 ,'deficit_gw_annual']=0
variables.loc[variables['deficit_gw_aggregate'] < 0 ,'deficit_gw_aggregate']=0
variables['withdraw_gw_allowable']=variables.gw_irwr-(variables.deficit_gw_aggregate/17)
variables['withdraw_agr_decrease']=variables.withdraw_gw-variables.withdraw_gw_allowable
variables.loc[variables['withdraw_agr_decrease']<0,'withdraw_agr_decrease']=0
variables['agrwat_lost_ratio']=variables.withdraw_agr_decrease/variables.withdraw_agr

In [7]:
# calculating trade-off ratio between modern irrigation and employment lost

water_saved_perhec=variables['withdraw_agr']/(2*data['land_irrig_tot']-data['land_irrig_modern'])

# wwdpmii= water withdrawal decrease per modern irrigation increase (percent)
wwdpmii=water_saved_perhec*0.01*data['land_irrig_tot']
#wdn=water decrease necessary for compensating the effect of lowering 1 percent modern irrigation
wdn=variables['withdraw_agr_decrease']+wwdpmii
#elsftr=employment lost for tradeoff ratio & mitr=modern irrigation for tradeoff ratio
eltr=wdn*data.agr_employment/variables.withdraw_agr
mitr=data.land_irrig_modern/data.land_irrig_tot-0.01
eln=variables['withdraw_agr_decrease']*data.agr_employment/variables.withdraw_agr
tradeoff_ratio=(eltr-eln)*100/18


In [8]:
#Creating a dataframe for the sub-indicators
#Dimension 1: Resources
sub_indicators=pd.DataFrame()
sub_indicators=pd.DataFrame()

sub_indicators['rain_coeff_variation']=(data['rain_coeff_variation']-data['rain_coeff_variation'].min())/(data['rain_coeff_variation'].max()-data['rain_coeff_variation'].min())
sub_indicators['monthly_varicoeff']=(data['monthly_varicoeff']-data['monthly_varicoeff'].min())/(data['monthly_varicoeff'].max()-data['monthly_varicoeff'].min())
sub_indicators['anomaly_rain']=abs(data['anomaly_rain'])
sub_indicators['anomaly_temp']=data['anomaly_temp']

sub_indicators['gw_agdef_gwrwr']=variables.deficit_gw_aggregate/variables.gw_irwr
sub_indicators['gw_andef_withdraw']=variables.deficit_gw_annual/variables.withdraw_gw

sub_indicators['agr_dependency_gw']=data.withdraw_gw_agr/variables.withdraw_agr
sub_indicators['ind_dependency_gw']=data.withdraw_gw_ind/variables.withdraw_ind
sub_indicators['urbanwat_dependency_gw']=data.withdraw_urban_gw/data.produced_urban_wat
sub_indicators['ruralwat_dependency_gw']=data.withdraw_rural_gw/data.produced_rural_wat

sub_indicators['withdraw_surf_ratio']=variables.withdraw_surf/variables.surf_irwr
sub_indicators['withdraw_gw_ratio']=variables.withdraw_gw/variables.gw_irwr

sub_indicators['irwr_percap']=(variables.irwr*(10**6)/data.population)/(1+data['pop_growth']/100)
#Dimension 2: access

sub_indicators['access_wat_urban']=data['access_wat_urban']
sub_indicators['access_wat_rural']=data['access_wat_rural']
sub_indicators['under_stress_pop']=data['under_stress_pop']
sub_indicators['access_sanitation_urban']=data['access_sanitation_urban']
sub_indicators['access_sanitation_rural']=data['access_sanitation_rural']
sub_indicators['treated_municipal_wastewater']=data.waste_facility_cap/data.daily_produced_waste
sub_indicators['quality_proxy']=data.urban_fam_treatwat/data.total_urban_fam
sub_indicators['damcap_rwr_ratio']=data.dam_cap/variables.surf_irwr

#Dimension 3:Economy
sub_indicators['efficiency_agr']=data.agr_added_value/variables.withdraw_agr
sub_indicators['modern_irrig']=data.land_irrig_modern/data.land_irrig_tot
sub_indicators['employment_lost_agr']=variables.agrwat_lost_ratio*data.agr_employment
sub_indicators['unaccounted_wat_urban']=data['unaccounted_wat_urban']
sub_indicators['unaccounted_wat_rural']=data['unaccounted_wat_rural']
sub_indicators['efficiency_ind']=data.ind_added_value/variables.withdraw_ind

sub_indicators.index=variables['province']
sub_indicators.loc['khuz','damcap_rwr_ratio']=1


In [9]:
res_sub_columns=['ACV','MCV','APA','ATA','Dag','Dan','GWDagr','GWDind','GWDurb','GWDrur','SWS','GWS','IRWR']
acc_sub_columns=['WAurb','WArur','USP','SAurb','SArur','TPC','WQ','DC']
eco_sub_columns=['AE','MI','EL','NRWurb','NRWrur','IE']
sub_columns1_list= ['R1','R1','R1','R1','R2','R2','R3','R3','R3','R3','R4','R4','R5']+['A1','A1','A1','A2','A2','A2','A3','A3']+['E1','E1','E1','E2','E2','E3']
sub_columns2_list=res_sub_columns+acc_sub_columns+eco_sub_columns
arraycolumns=[sub_columns1_list]+[sub_columns2_list]
sub_indicators_lvl1=pd.DataFrame(sub_indicators).round(decimals=4)
sub_indicators_lvl1.columns=pd.MultiIndex.from_arrays(arraycolumns, names=('Aggregation Level', 'Indicator'))
sub_indicators_lvl1.drop([('R5','IRWR'),('A3','WQ'),('A3','DC'),('E3','IE')],axis=1,inplace=True)
sub_indicators_lvl1.head()

Aggregation Level,R1,R1,R1,R1,R2,R2,R3,R3,R3,R3,...,A1,A1,A2,A2,A2,E1,E1,E1,E2,E2
Indicator,ACV,MCV,APA,ATA,Dag,Dan,GWDagr,GWDind,GWDurb,GWDrur,...,WArur,USP,SAurb,SArur,TPC,AE,MI,EL,NRWurb,NRWrur
province,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
azsh,0.0853,0.2109,5.0009,1.2218,0.9674,0.0411,0.431,0.5562,0.4589,0.798,...,79.6,0.6406,63.64,0.2,0.3865,0.8134,0.1898,0.0,18.88,36.1
azgh,0.142,0.216,0.1947,1.6438,0.9789,0.0208,0.4064,0.4744,0.4794,0.9637,...,75.85,0.4796,65.25,1.5,0.3893,0.4815,0.1606,5.7977,22.54,28.3
arda,0.0181,0.1365,9.741,1.1031,1.3766,0.1124,0.156,0.9786,0.2497,0.5949,...,75.74,0.7063,52.67,0.6,0.4302,0.6702,0.0921,0.0,25.4,29.4
esfe,0.3248,0.5142,11.1196,0.8887,5.4352,0.1733,0.674,0.2948,0.143,0.4991,...,71.63,0.2246,72.9,0.8,0.6079,0.2821,0.3279,4.5233,16.14,29.6
albo,0.1665,0.3619,12.6169,0.8668,3.7448,0.1105,0.6409,0.9477,0.7665,0.973,...,72.24,0.726,31.25,2.0,0.1469,0.878,0.2789,1.54,22.3,45.9


In [10]:
#importing relevant functions to perform PCA
from numpy import eye, asarray, dot, sum, diag
from numpy.linalg import svd
from scipy.sparse.linalg import eigs
from scipy.sparse.linalg import eigsh

#defining function for performing PCA
def PCA(df):
    st=df*0
    for col in df.columns:
        st[col]=(df[col]-df[col].mean())/df[col].std()
    st_cov=np.matrix(st.cov())
    size= len(st_cov)
    eigenvals, temp =eigs(st_cov,k=(size-1))
    eigenvals=eigenvals.real
    temp,eigenvec= eigs(st_cov,k=1,which='LM')
    eigenvec=eigenvec.real
    return (eigenvals,eigenvec)


In [11]:
#calculating sub_indicator weights using PCA
PCA_weight_lvl1=pd.Series(sub_indicators_lvl1.iloc[0,:]*0,name='lvl1_weights')
explained_variance_lvl1=[]

for col in ['R1','R2','R3','R4','A1','A2','E1','E2']:
    PCA_weight_lvl1[col]=abs(PCA(sub_indicators_lvl1[col])[1].transpose()[0])
    eigvalsum=PCA(sub_indicators_lvl1[col])[0].sum()
    eigvalmax=PCA(sub_indicators_lvl1[col])[0].max()
    explained_variance_lvl1.append((col,eigvalmax/eigvalsum))

#normalising weights so they add up to unity
for col1 in ['R1','R2','R3','R4','A1','A2','E1','E2']:
    temp=PCA_weight_lvl1[col1].sum()
    for col2 in sub_columns2_list:
        if (col1,col2) in PCA_weight_lvl1.index:            
            PCA_weight_lvl1[(col1,col2)]=(PCA_weight_lvl1[(col1,col2)]/temp).round(4)
PCA_weight_lvl1



Aggregation Level  Indicator
R1                 ACV          0.3214
                   MCV          0.3365
                   APA          0.2488
                   ATA          0.0933
R2                 Dag          0.5000
                   Dan          0.5000
R3                 GWDagr       0.1569
                   GWDind       0.2696
                   GWDurb       0.2889
                   GWDrur       0.2847
R4                 SWS          0.5000
                   GWS          0.5000
A1                 WAurb        0.4951
                   WArur        0.4959
                   USP          0.0090
A2                 SAurb        0.3505
                   SArur        0.2634
                   TPC          0.3861
E1                 AE           0.3478
                   MI           0.2419
                   EL           0.4103
E2                 NRWurb       0.5000
                   NRWrur       0.5000
Name: lvl1_weights, dtype: float64

In [12]:
#Normalizing Data
normalized=sub_indicators*0
max_allowable_withdraw=variables['withdraw_gw_allowable']/variables['gw_irwr']
max_allowable_withdraw.index=sub_indicators.index

for col in ind_type.columns:
    i=ind_type.columns.get_loc(col)
    if ind_type.loc[2,col]=='b': # The bigger the better indicators
        normalized.loc[sub_indicators[col]>=ind_type.loc[0,col],col]=1
        normalized.loc[sub_indicators[col]<=ind_type.loc[1,col],col]=0.01
        cond=(sub_indicators[col]>ind_type.loc[1,col])& (sub_indicators[col]<ind_type.loc[0,col])
        normalized.loc[cond,col]=(sub_indicators.loc[cond,col]-ind_type.loc[1,col])/(ind_type.loc[0,col]-ind_type.loc[1,col])
    elif ind_type.loc[2,col]=='l':      # The lower the better indicators
        normalized.loc[sub_indicators[col]<=ind_type.loc[0,col],col]=1
        normalized.loc[sub_indicators[col]>=ind_type.loc[1,col],col]=0.01
        cond=(sub_indicators[col]<ind_type.loc[1,col])& (sub_indicators[col]>ind_type.loc[0,col])
        normalized.loc[cond,col]=(ind_type.loc[1,col]-sub_indicators.loc[cond,col])/(ind_type.loc[1,col]-ind_type.loc[0,col])
    elif ind_type.loc[2,col]=='diff':    # GW withdrawal to rwr ratio normalization
        cond1=(sub_indicators[col] > max_allowable_withdraw)
        normalized.loc[cond1,col]=0.01
        cond2=sub_indicators[col]<0.25
        normalized.loc[cond2,col]=1
        cond3=(sub_indicators[col] < max_allowable_withdraw) & (sub_indicators[col]>0.25)
        normalized.loc[cond3,col]=(max_allowable_withdraw.loc[cond3]-sub_indicators.loc[cond3,col])/(max_allowable_withdraw.loc[cond3]-0.25)      

In [13]:
# dam capacity to surface rwr ratio normalization    
variation_coeff_agg=pd.Series(normalized[['rain_coeff_variation','monthly_varicoeff']].mean(axis=1),index=variables['province'])
variation_coeff_bins=pd.cut(variation_coeff_agg,4,labels=['Q1','Q2','Q3','Q4'])
ideal_damcap=pd.Series(index=variables['province'],dtype='float64') 
ideal_damcap[variation_coeff_bins=='Q1']=1
ideal_damcap[variation_coeff_bins=='Q2']=0.9
ideal_damcap[variation_coeff_bins=='Q3']=0.8
ideal_damcap[variation_coeff_bins=='Q4']=0.7
normalized['damcap_rwr_ratio']=damcap_normalize(sub_indicators['damcap_rwr_ratio'],ideal_damcap)   
normalized.loc['khuz','damcap_rwr_ratio']=1

# modifying modern irrigation indicator vlues based on aggregate GW reservoir deficits
gw_deficit_agg=pd.Series(aggregate(normalized[['gw_agdef_gwrwr','gw_andef_withdraw']]),index=variables['province'])
gw_deficit_bins=pd.qcut(gw_deficit_agg,4,labels=['Q1','Q2','Q3','Q4'])
gw_modifier=pd.Series(index=variables['province'],dtype='float64')
gw_modifier[gw_deficit_bins=='Q1']=0.8
gw_modifier[gw_deficit_bins=='Q2']=0.9
gw_modifier[gw_deficit_bins=='Q3']=1
gw_modifier[gw_deficit_bins=='Q4']=1
normalized['modern_irrig']=normalized['modern_irrig'].multiply(gw_modifier)


In [14]:
# Arithmetic aggregation method
normalized_pca=pd.DataFrame(normalized).round(decimals=4)
normalized_pca.columns=pd.MultiIndex.from_arrays(arraycolumns, names=('Aggregation Level', 'Indicator'))
indicators_weighted_arithmetic=normalized_pca*0
# multiplying normalized sub_indicators by their respective weights
for i in PCA_weight_lvl1.index:
    indicators_weighted_arithmetic[i]=normalized_pca[i]*PCA_weight_lvl1[i]

# calculating indicators from weighted sub_indicators
indicators_arithmetic=pd.DataFrame(index=normalized_pca.index)
indicators_arithmetic['CC']=indicators_weighted_arithmetic['R1'].sum(axis=1)
indicators_arithmetic['GD']=indicators_weighted_arithmetic['R2'].sum(axis=1)
indicators_arithmetic['GWD']=indicators_weighted_arithmetic['R3'].sum(axis=1)
indicators_arithmetic['WS']=indicators_weighted_arithmetic['R4'].sum(axis=1)

indicators_arithmetic['DW']=indicators_weighted_arithmetic['A1'].sum(axis=1)
indicators_arithmetic['SS']=indicators_weighted_arithmetic['A2'].sum(axis=1)

indicators_arithmetic['Eagr']=indicators_weighted_arithmetic['E1'].sum(axis=1)
indicators_arithmetic['NRW']=indicators_weighted_arithmetic['E2'].sum(axis=1)
indicators_columns=list(indicators_arithmetic.columns)
ind_multicol=[['Res','Res','Res','Res','Acc','Acc','Eco','Eco']]+[indicators_columns]
indicators_arithmetic.columns=pd.MultiIndex.from_arrays(ind_multicol, names=('Dimension', 'Indicator'))


In [15]:
# Geometric aggregation method

# calculating indicators from weighted sub_indicators
indicators_geometric=pd.DataFrame(index=normalized_pca.index)
indicators_geometric['CC']=weighted_geomean(normalized_pca['R1'],PCA_weight_lvl1['R1'])
indicators_geometric['GD']=weighted_geomean(normalized_pca['R2'],PCA_weight_lvl1['R2'])
indicators_geometric['GWD']=weighted_geomean(normalized_pca['R3'],PCA_weight_lvl1['R3'])
indicators_geometric['WS']=weighted_geomean(normalized_pca['R4'],PCA_weight_lvl1['R4'])

indicators_geometric['DW']=weighted_geomean(normalized_pca['A1'],PCA_weight_lvl1['A1'])
indicators_geometric['SS']=weighted_geomean(normalized_pca['A2'],PCA_weight_lvl1['A2'])

indicators_geometric['Eagr']=weighted_geomean(normalized_pca['E1'],PCA_weight_lvl1['E1'])
indicators_geometric['NRW']=weighted_geomean(normalized_pca['E2'],PCA_weight_lvl1['E2'])

ind_multicol=[['Res','Res','Res','Res','Acc','Acc','Eco','Eco']]+[indicators_columns]
indicators_geometric.columns=pd.MultiIndex.from_arrays(ind_multicol, names=('Dimension', 'Indicator'))


In [16]:
#calculating sub_indicator weights using PCA for arithmetic agg method
PCA_ariweight=pd.Series(indicators_arithmetic.iloc[0,:]*0,name='lvl2_weights')
explained_arivariance=[]

for col in ['Res','Acc','Eco']:
    PCA_ariweight[col]=abs(PCA(indicators_arithmetic[col])[1].transpose()[0])
    eigvalsum=PCA(indicators_arithmetic[col])[0].sum()
    eigvalmax=PCA(indicators_arithmetic[col])[0].max()
    explained_arivariance.append((col,eigvalmax/eigvalsum))

#normalising weights so they add up to unity
for col1 in ['Res','Res','Res','Res','Acc','Acc','Eco','Eco']:
    temp=PCA_ariweight[col1].sum()
    for col2 in indicators_columns:
        if (col1,col2) in PCA_ariweight.index:            
            PCA_ariweight[(col1,col2)]=(PCA_ariweight[(col1,col2)]/temp).round(4)



In [17]:
#calculating sub_indicator weights using PCA for geometric agg method
PCA_geoweight=pd.Series(indicators_geometric.iloc[0,:]*0,name='lvl2_weights')
explained_geovariance=[]

for col in ['Res','Acc','Eco']:
    PCA_geoweight[col]=abs(PCA(indicators_geometric[col])[1].transpose()[0])
    eigvalsum=PCA(indicators_geometric[col])[0].sum()
    eigvalmax=PCA(indicators_geometric[col])[0].max()
    explained_geovariance.append((col,eigvalmax/eigvalsum))

#normalising weights so they add up to unity
for col1 in ['Res','Res','Res','Res','Acc','Acc','Eco','Eco']:
    temp=PCA_geoweight[col1].sum()
    for col2 in indicators_columns:
        if (col1,col2) in PCA_geoweight.index:            
            PCA_geoweight[(col1,col2)]=(PCA_geoweight[(col1,col2)]/temp).round(4)

In [18]:
Dimensions_geometric=pd.DataFrame(index=normalized_pca.index)
Dimensions_geometric['Res']=weighted_geomean(indicators_geometric['Res'],PCA_geoweight['Res'])
Dimensions_geometric['Acc']=weighted_geomean(indicators_geometric['Acc'],PCA_geoweight['Acc'])
Dimensions_geometric['Eco']=weighted_geomean(indicators_geometric['Eco'],PCA_geoweight['Eco'])
Dimensions_geometric.head()

Unnamed: 0_level_0,Res,Acc,Eco
province,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
azsh,0.358288,0.362292,0.577765
azgh,0.290625,0.459657,0.479812
arda,0.25647,0.396296,0.493851
esfe,0.097006,0.450387,0.48885
albo,0.173138,0.321604,0.499504


In [19]:
Dimensions_arithmetic=pd.DataFrame(index=normalized_pca.index)
Dimensions_arithmetic['Res']=weighted_sum(indicators_arithmetic['Res'],PCA_ariweight['Res'])
Dimensions_arithmetic['Acc']=weighted_sum(indicators_arithmetic['Acc'],PCA_ariweight['Acc'])
Dimensions_arithmetic['Eco']=weighted_sum(indicators_arithmetic['Eco'],PCA_ariweight['Eco'])
Dimensions_arithmetic.head()

Unnamed: 0_level_0,Res,Acc,Eco
province,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
azsh,0.549892,0.576334,0.63638
azgh,0.567248,0.568719,0.522804
arda,0.461455,0.546603,0.581097
esfe,0.225492,0.597541,0.564926
albo,0.385101,0.440739,0.558947


In [20]:
wsi_geoweight = abs(PCA(Dimensions_geometric)[1].transpose()[0])
wsi_geoweight = wsi_geoweight/ (wsi_geoweight.sum())

wsi_ariweight = abs(PCA(Dimensions_arithmetic)[1].transpose()[0])
wsi_ariweight = wsi_ariweight/ (wsi_ariweight.sum())



In [21]:
WSI_geo=weighted_geomean(Dimensions_geometric,wsi_geoweight)

WSI_ari=weighted_sum(Dimensions_arithmetic,wsi_ariweight)
