In [158]:
import numpy as np
import pandas as pd
odisha_df=pd.read_csv('final_dataset.csv')
data=odisha_df.loc[:,'nddp':]
# Extracting only the data values (Removing district and variable names)
data_values=np.array(data.values)

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA as sklearnPCA

# Standardizing Variables
data_values_std = StandardScaler().fit_transform(data_values)

sklearn_pca = sklearnPCA(n_components=4)

sklearn_pca.fit_transform(data_values_std)

pc_explain_percentage=sklearn_pca.explained_variance_ratio_
pc_total_explained_percentage=sum(pc_explain_percentage)

pc1=sklearn_pca.components_[0]
pc2=sklearn_pca.components_[1]
pc3=sklearn_pca.components_[2]
pc4=sklearn_pca.components_[3]

factor1=[]
factor2=[]
factor3=[]
factor4=[]

# Terminology ''Factor scores''  used instead of 'Component Scores'
# Calculating Factor scores 1,2,3,4 for each District
# Multiplying standardized variable values for each district to corresponding factor loading(Principal Component)
for row in data_values_std:
    fac1=float(sum(row*pc1))
    fac2=float(sum(row*pc2))
    fac3=float(sum(row*pc3))
    fac4=float(sum(row*pc4))
    factor1.append(fac1)
    factor2.append(fac2)
    factor3.append(fac3)
    factor4.append(fac4)
factor1=np.array(factor1)
factor2=np.array(factor2)
factor3=np.array(factor3)
factor4=np.array(factor4)

# Calculating Non Standardized Index
proportion_explained_among_factors=pc_explain_percentage/pc_total_explained_percentage
NSI=factor1*proportion_explained_among_factors[0]+factor2*proportion_explained_among_factors[1] \
                +factor3*proportion_explained_among_factors[2]+factor4*proportion_explained_among_factors[3]

max_nsi=max(NSI)
min_nsi=min(NSI)
NSI_test=NSI
NSI=NSI-min_nsi
NSI=NSI/(max_nsi-min_nsi)
SI=NSI*100  #Standardized Index

#  Processing Index , making index more interpretable
index=[]
for row in SI:
    index.append(row)

index=100-np.array(index) # Subtracting from 100 to make the index more interpretable,
                          # Higher the index, more developed the district

# Adding index to main dataframe
odisha_df['index']=pd.Series(index,odisha_df.index)

final_df=odisha_df.loc[:,['district','index']]

final_df=final_df.sort_values(['index'], ascending=False)
# Adding a new column Rank to the result dataframe
final_df['rank']=pd.Series(range(1,31),final_df.index)

# Changing the index of the dataframe to counter the affect of sorting
final_df.index=np.arange(1,len(final_df)+1)
final_df.loc[30,'index']=1.0  #Assigning the least index as 1 instead of 0.0 to be able to better visualize this point
final_df.to_csv('final_result.csv')
final_df

Unnamed: 0,district,index,rank
1,Khordha,100.0,1
2,Sundargarh,95.467134,2
3,Cuttack,88.55253,3
4,Anugul,83.544095,4
5,Ganjam,80.609626,5
6,Jajapur,78.532034,6
7,Kendujhar,70.611092,7
8,Sambalpur,66.49382,8
9,Jharsuguda,63.569952,9
10,Jagatsinghapur,63.480126,10
