# K-Means Modeling 

To see patterns in the data, I decided K-means models were the best way to see how countries are grouped as far as Chinese aid and other factors. Below are several K-means models, with different variables. I also tried DBSCAN models, but got negative silhouette scores, so decided to stay with the K-means models for this data. 

In [580]:
# Importing libraries 
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

In [581]:
# Reading in the combined aid data 
aid = pd.read_csv('./aid_data/combined_data/aid_sums.csv')

In [582]:
# Looking at my data frame 
aid.head()

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,resource_rents,gdp_per_cap00,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max
0,Algeria,438050500.0,588090100.0,247662100.0,0.646,0.749,6.0,5.0,NF,6.0,...,24.602722,1764.888222,7.673,0.0,2937.203478,0.103,11.254086,15.108798,6.362761,1
1,Angola,803087300.0,38642330000.0,2939893000.0,0.394,0.557,6.0,6.0,NF,6.0,...,23.38193,556.836318,40.676,-1.0,3286.361923,0.163,29.808251,1434.290317,109.120217,1
2,Benin,1107820000.0,1155924000.0,1785451000.0,0.398,0.505,2.0,2.0,F,2.0,...,4.872945,374.192394,30.452,0.0,460.251202,0.107,107.692898,112.369168,173.566471,0
3,Botswana,385871900.0,2563158000.0,2141447000.0,0.578,0.709,2.0,2.0,F,3.0,...,2.516289,3522.308678,17.346,1.0,4341.944603,0.131,184.750231,1227.205008,1025.295634,1
4,Burkina Faso,2858288000.0,0.0,2048434000.0,0.286,0.405,4.0,4.0,PF,6.0,...,16.981603,226.475981,30.387,1.0,413.232114,0.119,162.531851,0.0,116.480855,0


In [583]:
# Looking at my columns 
print(aid.columns)

Index(['country', 'world_bank_totals', 'chinese_aid_totals', 'usaid_aid',
       'hdi_00', 'hdi_14', 'pr_score00', 'cl_score00', 'fh_status00',
       'pr_score14', 'cl_score14', 'fh_status14', 'cpi_2014', 'population',
       'gdp_per_cap14', 'resource_rents', 'gdp_per_cap00', 'debt_to_gdp',
       'fh_change', 'pc_gdp_change', 'hdi_change', 'world_bank_pc',
       'chinese_aid_pc', 'usaid_pc', 'chinese_aid_total_max'],
      dtype='object')


## K-means Model with All Numeric Features

In [584]:
# Scaling my features, for this one putting almost all the variables into the model 
ss = StandardScaler()
X_scaled = ss.fit_transform(aid[['world_bank_totals', 'chinese_aid_totals', 'usaid_aid',
       'hdi_00', 'hdi_14', 'pr_score00', 'cl_score00', 
       'pr_score14', 'cl_score14', 'cpi_2014', 'population',
       'gdp_per_cap14', 'resource_rents', 'gdp_per_cap00', 'debt_to_gdp',
       'fh_change', 'pc_gdp_change', 'hdi_change', 'world_bank_pc',
       'chinese_aid_pc', 'usaid_pc']])

In [585]:
# Instantiating my k-means model. True K of 9 gave me the best silhouette score. 
true_k = 9
model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1, random_state=22)
model.fit(X_scaled)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=100,
       n_clusters=9, n_init=1, n_jobs=None, precompute_distances='auto',
       random_state=22, tol=0.0001, verbose=0)

In [586]:
# Predicting my clusters 
model.predict(X_scaled)

array([6, 0, 1, 1, 0, 0, 1, 0, 4, 4, 0, 4, 0, 0, 0, 0, 4, 3, 6, 4, 1, 0,
       4, 6, 8, 4, 0, 0, 0, 0, 0, 0, 0, 4, 1, 0, 0, 1, 0, 3, 0, 1, 1, 0,
       1, 4, 0, 0, 0, 0, 0, 0, 4, 0, 5, 6, 7, 6, 0, 4, 4, 4, 5, 4, 5, 6,
       2, 6], dtype=int32)

In [587]:
# Making a column for the labels for this model and taking a look at it 
aid['clusters_all'] = model.labels_
aid.head()

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,gdp_per_cap00,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all
0,Algeria,438050500.0,588090100.0,247662100.0,0.646,0.749,6.0,5.0,NF,6.0,...,1764.888222,7.673,0.0,2937.203478,0.103,11.254086,15.108798,6.362761,1,6
1,Angola,803087300.0,38642330000.0,2939893000.0,0.394,0.557,6.0,6.0,NF,6.0,...,556.836318,40.676,-1.0,3286.361923,0.163,29.808251,1434.290317,109.120217,1,0
2,Benin,1107820000.0,1155924000.0,1785451000.0,0.398,0.505,2.0,2.0,F,2.0,...,374.192394,30.452,0.0,460.251202,0.107,107.692898,112.369168,173.566471,0,1
3,Botswana,385871900.0,2563158000.0,2141447000.0,0.578,0.709,2.0,2.0,F,3.0,...,3522.308678,17.346,1.0,4341.944603,0.131,184.750231,1227.205008,1025.295634,1,1
4,Burkina Faso,2858288000.0,0.0,2048434000.0,0.286,0.405,4.0,4.0,PF,6.0,...,226.475981,30.387,1.0,413.232114,0.119,162.531851,0.0,116.480855,0,0


In [588]:
# This silhouette score is just okay
silhouette_score(X_scaled, model.labels_)

0.12292105001931611

In [589]:
# Value counts for each of these clusters 
aid['clusters_all'].value_counts()

0    30
4    14
1     9
6     7
5     3
3     2
8     1
7     1
2     1
Name: clusters_all, dtype: int64

In [590]:
# Making a dataframe for the statistics of cluster 4 
cluster_4 = aid[aid['clusters_all'] == 4].describe().T

In [591]:
# Looking at this data frame 
cluster_4.head()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
world_bank_totals,14.0,522105800.0,734014300.0,0.0,31584220.0,245303500.0,516831900.0,2445064000.0
chinese_aid_totals,14.0,4842222000.0,9763207000.0,0.0,163268700.0,394397000.0,1447106000.0,30610640000.0
usaid_aid,14.0,5095573000.0,7711530000.0,139231200.0,582938700.0,2912450000.0,4178440000.0,25818000000.0
hdi_00,14.0,0.4520923,0.1137448,0.298,0.38725,0.449,0.5074306,0.702
hdi_14,14.0,0.4894286,0.093426,0.355,0.43525,0.473,0.53225,0.72


In [592]:
# Only keeping the mean column 
cluster_4 = cluster_4['mean']

In [593]:
# Making a dataframe for cluster 0 
cluster_0 = aid[aid['clusters_all'] == 0].describe().T

In [594]:
# Keeping only the mean column 
cluster_0 = cluster_0['mean']

In [595]:
# Making this series into a dataframe 
cluster_4 = pd.DataFrame(cluster_4)

In [596]:
# Making this series into a dataframe 
cluster_0 = pd.DataFrame(cluster_0)

In [597]:
# Looking at the cluster 0 data frame 
cluster_0.head()

Unnamed: 0,mean
world_bank_totals,3173744000.0
chinese_aid_totals,5753771000.0
usaid_aid,5901221000.0
hdi_00,0.416962
hdi_14,0.5284333


In [598]:
# Renaming the column to mean_0
cluster_0.rename(columns={'mean': "mean_0"}, inplace=True)

In [599]:
# Renaming the column to mean_4
cluster_4.rename(columns={'mean': "mean_4"}, inplace=True)

In [600]:
# Combining the data frames 
cluster_compare = cluster_0.join(cluster_4)

In [601]:
# Making a column of the difference between the two 
cluster_compare['cluster_difference'] = cluster_compare['mean_0'] - cluster_compare['mean_4']

In [602]:
cluster_compare.head(25)

Unnamed: 0,mean_0,mean_4,cluster_difference
world_bank_totals,3173744000.0,522105800.0,2651638000.0
chinese_aid_totals,5753771000.0,4842222000.0,911548800.0
usaid_aid,5901221000.0,5095573000.0,805647800.0
hdi_00,0.416962,0.4520923,-0.03513022
hdi_14,0.5284333,0.4894286,0.03900476
pr_score00,5.033333,5.785714,-0.752381
cl_score00,4.8,5.428571,-0.6285714
pr_score14,4.5,6.357143,-1.857143
cl_score14,4.3,6.142857,-1.842857
cpi_2014,32.41667,22.0,10.41667


### Some characteristics of cluster 0, the largest cluster (compared to cluster 4, the second largest): 

- More total aid from China, US, and World Bank
- Lower Freedom House scores (which is good)
- Higher CPI (also good)
- Higher populations
- Lower percentage of gdp from resources
- Lower debt to gdp 
- Improving freedom house scores
- Larger changes in HDI
- More per capita world bank aid, lower per capita Chinese and US aid

In [603]:
aid[aid['clusters_all'] == 0]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,gdp_per_cap00,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all
1,Angola,803087300.0,38642330000.0,2939893000.0,0.394,0.557,6.0,6.0,NF,6.0,...,556.836318,40.676,-1.0,3286.361923,0.163,29.808251,1434.290317,109.120217,1,0
4,Burkina Faso,2858288000.0,0.0,2048434000.0,0.286,0.405,4.0,4.0,PF,6.0,...,226.475981,30.387,1.0,413.232114,0.119,162.531851,0.0,116.480855,0,0
5,Burundi,1163282000.0,558205200.0,1500181000.0,0.293,0.429,6.0,6.0,NF,6.0,...,136.463971,35.772,-1.0,108.862768,0.136,118.168117,56.703409,152.390909,0,0
7,Cameroon,1324141000.0,13422780000.0,634685400.0,0.439,0.54,7.0,6.0,NF,6.0,...,649.991845,21.534,-1.0,750.395172,0.101,58.378848,591.784862,27.982074,1,0
10,Comoros,73683160.0,280361300.0,11242950.0,0.457,0.534,6.0,4.0,PF,3.0,...,645.818691,22.597,-3.0,723.35794,0.077,97.029399,369.192814,14.805236,1,0
12,Cote D'Ivoire,1815460000.0,2865090000.0,1851870000.0,0.407,0.478,6.0,5.0,PF,5.0,...,651.305907,44.79,-2.0,726.490656,0.071,80.160942,126.506998,81.768633,1,0
13,Democratic Republic of Congo,5359333000.0,1356688000.0,6982450000.0,0.333,0.441,7.0,6.0,NF,6.0,...,405.216253,17.51,-1.0,-7.874582,0.108,72.651738,18.391425,94.654899,0,0
14,Djibouti,189723500.0,77881100.0,474107700.0,0.361,0.475,4.0,5.0,PF,6.0,...,768.176077,38.811,2.0,826.973923,0.114,211.109741,86.660117,527.550675,0,0
15,Egypt,8205844000.0,615146400.0,61292720000.0,0.611,0.683,6.0,5.0,NF,6.0,...,1450.476242,85.127,0.0,1197.817927,0.072,90.747858,6.802861,677.831989,0,0
21,Guinea,631584500.0,924788500.0,1347825000.0,0.335,0.446,6.0,5.0,NF,5.0,...,363.482479,35.09,-1.0,377.555656,0.111,56.639386,82.933402,120.870534,0,0


In [604]:
# Many of these countries appear to be troubled and poor, without making as much progress as those in 
# cluster 0
aid[aid['clusters_all'] == 4]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,gdp_per_cap00,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all
8,Central African Republic,294585200.0,396736900.0,552390700.0,0.307,0.355,3.0,4.0,PF,7.0,...,251.206877,69.162,7.0,82.907673,0.048,65.98871,88.871262,123.738588,0,4
9,Chad,762985300.0,1522646000.0,2885400000.0,0.298,0.406,6.0,5.0,NF,7.0,...,165.762987,41.545,2.0,794.677933,0.108,55.840891,111.438466,211.174845,0,4
11,Congo,482041300.0,9013684000.0,243388700.0,0.495,0.595,6.0,4.0,PF,6.0,...,1029.577037,47.605,1.0,1976.159875,0.1,101.761441,1902.835892,51.380637,1,4
16,Eritrea,486042900.0,485394800.0,1028387000.0,0.507431,0.436,7.0,5.0,NF,7.0,...,308.133784,128.674,2.0,158.866216,-0.071431,108.612937,108.46812,229.807076,0,4
19,Gambia,159889100.0,0.0,139231200.0,0.382,0.449,7.0,5.0,NF,6.0,...,594.151643,104.889,0.0,153.557089,0.067,78.994956,0.0,68.78868,0,4
22,Guinea-Bissau,196021800.0,309113800.0,152051700.0,0.507431,0.45,4.0,5.0,PF,5.0,...,308.144119,54.856,1.0,247.460929,-0.057431,115.822072,182.643988,89.841749,1,4
25,Jordan,1829419000.0,157822400.0,25818000000.0,0.702,0.72,4.0,4.0,PF,6.0,...,1674.825261,89.049,3.0,1678.798121,0.018,205.090735,17.692999,2894.378653,0,4
33,Mauritania,527094900.0,1220488000.0,674582600.0,0.446,0.518,6.0,5.0,NF,6.0,...,491.842762,80.361,0.0,877.345183,0.072,134.090267,310.485915,171.61038,1,4
45,South Sudan,126336900.0,139151400.0,4261786000.0,0.507431,0.435,7.0,7.0,NF,7.0,...,9171.330145,38.339,0.0,-8339.479683,-0.072431,11.96952,13.183603,403.773891,0,4
52,Yemen,2445064000.0,392057100.0,2939501000.0,0.432,0.504,5.0,6.0,NF,6.0,...,554.448633,48.722,1.0,564.69886,0.072,94.683749,15.18219,113.830522,0,4


In [605]:
# These countries are fairly prosperous and democratic compared to clusters 0 and 4 
aid[aid['clusters_all'] == 1]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,gdp_per_cap00,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all
2,Benin,1107820000.0,1155924000.0,1785451000.0,0.398,0.505,2.0,2.0,F,2.0,...,374.192394,30.452,0.0,460.251202,0.107,107.692898,112.369168,173.566471,0,1
3,Botswana,385871900.0,2563158000.0,2141447000.0,0.578,0.709,2.0,2.0,F,3.0,...,3522.308678,17.346,1.0,4341.944603,0.131,184.750231,1227.205008,1025.295634,1,1
6,Cabo Verde,279419600.0,206126600.0,527552600.0,0.564,0.641,1.0,2.0,F,1.0,...,1259.323656,115.924,-1.0,2163.425146,0.077,539.140106,397.721329,1017.912719,0,1
20,Ghana,4853869000.0,12663980000.0,4463691000.0,0.483,0.577,2.0,3.0,F,1.0,...,258.470959,70.157,-2.0,1368.151823,0.094,178.290666,465.168975,163.958789,1,1
34,Mauritius,438339800.0,18482130000.0,24387200.0,0.674,0.786,1.0,2.0,F,1.0,...,3929.075495,57.543,0.0,5234.557978,0.112,347.631028,14657.494792,19.340582,1,1
37,Namibia,15833450.0,2942054000.0,2659016000.0,0.543,0.631,2.0,3.0,F,2.0,...,2136.440243,25.189,-1.0,4018.048643,0.088,6.964562,1294.103684,1169.60522,1,1
41,Sao Tome and Principe,55158990.0,0.0,38847670.0,0.48,0.577,1.0,2.0,F,2.0,...,1062.49,69.553,1.0,150.674082,0.097,281.815956,0.0,198.478856,0,1
42,Senegal,2227930000.0,2037594000.0,3656034000.0,0.39,0.501,3.0,4.0,PF,2.0,...,604.654066,54.462,-3.0,732.752624,0.111,157.176179,143.748325,257.926176,0,1
44,South Africa,3848384000.0,3355333000.0,10589590000.0,0.629,0.691,1.0,2.0,F,2.0,...,3032.427138,46.985,1.0,4550.269791,0.062,70.553013,61.513827,194.140619,0,1


In [606]:
# Qatar is an outlier in some senses. Does not receive much aid, high hdi, high gdp, authoritarian country
aid[aid['clusters_all'] == 2]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,gdp_per_cap00,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all
66,Qatar,0.0,0.0,18859773.0,0.816,0.854,6.0,6.0,NF,6.0,...,29976.11704,24.912,-1.0,35584.25836,0.038,0.0,0.0,7.669075,0,2


In [607]:
# These are populous nations that receive lots of aid, particularly from China. They are partially-free
# according to Freedom House, and are in the middle as far as HDI. 
aid[aid['clusters_all'] == 3]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,gdp_per_cap00,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all
17,Ethiopia,11450840000.0,83308640000.0,19011910000.0,0.283,0.446,5.0,5.0,PF,6.0,...,124.4608,46.802,2.0,324.959577,0.163,116.733003,849.271394,193.8127,1,3
39,Nigeria,10756090000.0,32518600000.0,9655779000.0,0.507431,0.523,4.0,4.0,PF,4.0,...,567.930736,13.072,1.0,1995.969233,0.015569,60.973878,184.34068,54.736455,1,3


In [608]:
# Oil rich, non-democratic nations with high HDI
aid[aid['clusters_all'] == 5]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,gdp_per_cap00,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all
54,Bahrain,0.0,18159502.92,900204738.0,0.792,0.81,7.0,6.0,NF,7.0,...,13636.39623,44.397,0.0,8758.9098,0.018,0.0,13.591679,673.768118,0,5
62,United Arab Emirates,0.0,31771063.0,37945421.0,0.782,0.847,6.0,5.0,NF,6.0,...,33291.41937,15.537,1.0,5203.62698,0.065,0.0,3.448064,4.118157,0,5
64,Kuwait,0.0,0.0,8617661.0,0.786,0.8,4.0,5.0,PF,5.0,...,18440.37852,3.429,1.0,18718.35568,0.014,0.0,0.0,2.334813,0,5


In [609]:
# Mostly non-democratic Arab countries, with fairly high HDI
aid[aid['clusters_all'] == 6]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,gdp_per_cap00,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all
0,Algeria,438050500.0,588090100.0,247662135.0,0.646,0.749,6.0,5.0,NF,6.0,...,1764.888222,7.673,0.0,2937.203478,0.103,11.254086,15.108798,6.362761,1,6
18,Gabon,62456370.0,2036791000.0,100394128.0,0.627,0.688,5.0,4.0,PF,6.0,...,4125.716664,34.063,2.0,5341.647122,0.061,33.154461,1081.213802,53.293411,1,6
23,Iran,1605363000.0,0.0,63865441.0,0.671,0.788,6.0,6.0,NF,6.0,...,1670.00947,11.826,0.0,4566.279941,0.117,20.723515,0.0,0.824435,0,6
55,Equatorial Guinea,0.0,3762500000.0,5080444.0,0.52,0.59,7.0,7.0,NF,7.0,...,1725.554738,12.721,0.0,14404.781632,0.07,0.0,3352.562039,4.526911,1,6
57,Libya,0.0,17301000.0,950008484.0,0.728,0.693,7.0,7.0,NF,6.0,...,7142.774452,36.39,-2.0,-611.917737,-0.035,0.0,2.719413,149.324577,0,6
65,Oman,0.0,0.0,885517811.0,0.704,0.815,6.0,5.0,NF,6.0,...,8601.25417,4.925,0.0,8283.02924,0.111,0.0,0.0,219.880964,0,6
67,Saudi Arabia,0.0,0.0,20942632.0,0.744,0.853,7.0,7.0,NF,7.0,...,9171.330145,1.562,0.0,11916.024265,0.109,0.0,0.0,0.677383,0,6


In [610]:
# Democratic country with very high US aid, high HDI
aid[aid['clusters_all'] == 7]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,gdp_per_cap00,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all
56,Israel,0.0,66231500.0,102054100000.0,0.853,0.9,1.0,3.0,F,1.0,...,21043.57493,66.094,-1.0,11934.38594,0.047,0.0,8.061577,12421.83908,0,7


In [611]:
# Non-democratic, oil-rich country which has received massive amounts of US aid 
aid[aid['clusters_all'] == 8]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,gdp_per_cap00,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all
24,Iraq,1174182000.0,23499450.0,134310000000.0,0.608,0.662,7.0,7.0,NF,6.0,...,1100.0,32.028,-2.0,4244.331964,0.054,34.121353,0.682886,3903.0044,0,8


In [612]:
# Saving my data to a csv 
aid.to_csv('./aid_data/combined_data/aid_clusters.csv', index=True)

## K-means Model with Aid Totals

In [613]:
# Scaling, fitting and transforming my features
ss = StandardScaler()
X_scaled2 = ss.fit_transform(aid[['world_bank_totals', 'chinese_aid_totals', 'usaid_aid']])

In [614]:
# Instantiating my model. 4 clusters gave me the best silhouette score. 
true_k = 4
model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1, random_state=22)
model.fit(X_scaled2)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=100,
       n_clusters=4, n_init=1, n_jobs=None, precompute_distances='auto',
       random_state=22, tol=0.0001, verbose=0)

In [615]:
# Making predictions 
model.predict(X_scaled2)

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0], dtype=int32)

In [616]:
# Making a column for the labels for this model 
aid['cluster_all_aid'] = model.labels_
aid.head()

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all,cluster_all_aid
0,Algeria,438050500.0,588090100.0,247662100.0,0.646,0.749,6.0,5.0,NF,6.0,...,7.673,0.0,2937.203478,0.103,11.254086,15.108798,6.362761,1,6,0
1,Angola,803087300.0,38642330000.0,2939893000.0,0.394,0.557,6.0,6.0,NF,6.0,...,40.676,-1.0,3286.361923,0.163,29.808251,1434.290317,109.120217,1,0,1
2,Benin,1107820000.0,1155924000.0,1785451000.0,0.398,0.505,2.0,2.0,F,2.0,...,30.452,0.0,460.251202,0.107,107.692898,112.369168,173.566471,0,1,0
3,Botswana,385871900.0,2563158000.0,2141447000.0,0.578,0.709,2.0,2.0,F,3.0,...,17.346,1.0,4341.944603,0.131,184.750231,1227.205008,1025.295634,1,1,0
4,Burkina Faso,2858288000.0,0.0,2048434000.0,0.286,0.405,4.0,4.0,PF,6.0,...,30.387,1.0,413.232114,0.119,162.531851,0.0,116.480855,0,0,0


In [617]:
# Checking the silouhette score for this model, which is good
silhouette_score(X_scaled2, model.labels_)

0.7143082216474426

In [618]:
# Looking at the value counts. The majority fall in one cluster 
aid['cluster_all_aid'].value_counts()

0    59
1     6
2     2
3     1
Name: cluster_all_aid, dtype: int64

In [619]:
# These are highly populous countries that mostly receive a lot of aid in general. 
aid[aid['cluster_all_aid'] == 1]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all,cluster_all_aid
1,Angola,803087300.0,38642330000.0,2939893000.0,0.394,0.557,6.0,6.0,NF,6.0,...,40.676,-1.0,3286.361923,0.163,29.808251,1434.290317,109.120217,1,0,1
17,Ethiopia,11450840000.0,83308640000.0,19011910000.0,0.283,0.446,5.0,5.0,PF,6.0,...,46.802,2.0,324.959577,0.163,116.733003,849.271394,193.8127,1,3,1
26,Kenya,5346269000.0,39532640000.0,15133680000.0,0.446,0.556,6.0,5.0,NF,4.0,...,48.564,-3.0,662.612357,0.11,114.481181,846.523638,324.061835,1,0,1
39,Nigeria,10756090000.0,32518600000.0,9655779000.0,0.507431,0.523,4.0,4.0,PF,4.0,...,13.072,1.0,1995.969233,0.015569,60.973878,184.34068,54.736455,1,3,1
47,Tanzania,7763594000.0,24102820000.0,9664759000.0,0.395,0.509,4.0,4.0,PF,3.0,...,33.795,-2.0,435.434514,0.114,155.396745,482.444055,193.450622,1,0,1
60,Sudan,0.0,30610640000.0,19230960000.0,0.403,0.496,7.0,7.0,NF,7.0,...,56.005,0.0,1459.368229,0.093,0.0,806.017098,506.375568,1,4,1


In [620]:
# Two countries that recieve massive amounts of US aid 
aid[aid['cluster_all_aid'] == 2]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all,cluster_all_aid
24,Iraq,1174182000.0,23499450.0,134310000000.0,0.608,0.662,7.0,7.0,NF,6.0,...,32.028,-2.0,4244.331964,0.054,34.121353,0.682886,3903.0044,0,8,2
56,Israel,0.0,66231500.0,102054100000.0,0.853,0.9,1.0,3.0,F,1.0,...,66.094,-1.0,11934.38594,0.047,0.0,8.061577,12421.83908,0,7,2


In [621]:
# Receives more total World Bank aid than any other country 
aid[aid['cluster_all_aid'] == 3]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,debt_to_gdp,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all,cluster_all_aid
50,Turkey,25606560000.0,8767650000.0,1776648000.0,0.655,0.792,4.0,5.0,PF,3.0,...,28.769,-2.0,8961.211862,0.137,331.55418,113.52368,23.00407,1,0,3


## K-means Model with Per Capita Aid

In [622]:
# Scaling features for aid per capita 
ss = StandardScaler()
X_scaled3 = ss.fit_transform(aid[['chinese_aid_pc', 'world_bank_pc', 'usaid_pc']])

In [623]:
# Instantiating my model. true_k=5 gave me the best silouhette score 
true_k = 5
model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1, random_state=22)
model.fit(X_scaled3)

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=100,
       n_clusters=5, n_init=1, n_jobs=None, precompute_distances='auto',
       random_state=22, tol=0.0001, verbose=0)

In [624]:
# Using my model to make predictions
model.predict(X_scaled3)

array([0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 4, 4, 0, 2, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0,
       0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0,
       0, 0], dtype=int32)

In [625]:
# Making a column for new clusters
aid['cluster_pc_aid'] = model.labels_
aid.head()

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all,cluster_all_aid,cluster_pc_aid
0,Algeria,438050500.0,588090100.0,247662100.0,0.646,0.749,6.0,5.0,NF,6.0,...,0.0,2937.203478,0.103,11.254086,15.108798,6.362761,1,6,0,0
1,Angola,803087300.0,38642330000.0,2939893000.0,0.394,0.557,6.0,6.0,NF,6.0,...,-1.0,3286.361923,0.163,29.808251,1434.290317,109.120217,1,0,1,0
2,Benin,1107820000.0,1155924000.0,1785451000.0,0.398,0.505,2.0,2.0,F,2.0,...,0.0,460.251202,0.107,107.692898,112.369168,173.566471,0,1,0,0
3,Botswana,385871900.0,2563158000.0,2141447000.0,0.578,0.709,2.0,2.0,F,3.0,...,1.0,4341.944603,0.131,184.750231,1227.205008,1025.295634,1,1,0,0
4,Burkina Faso,2858288000.0,0.0,2048434000.0,0.286,0.405,4.0,4.0,PF,6.0,...,1.0,413.232114,0.119,162.531851,0.0,116.480855,0,0,0,0


In [579]:
# Checking my silhouette score, which is good
silhouette_score(X_scaled3, model.labels_)

0.45525017625873254

In [626]:
# Most of the countries are in the same cluster
aid['cluster_pc_aid'].value_counts()

0    56
2     7
4     3
3     1
1     1
Name: cluster_pc_aid, dtype: int64

In [627]:
# Mauritius gets a huge amount of aid per capita, as it is a small country 
aid[aid['cluster_pc_aid'] == 1]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all,cluster_all_aid,cluster_pc_aid
34,Mauritius,438339800.0,18482130000.0,24387198.0,0.674,0.786,1.0,2.0,F,1.0,...,0.0,5234.557978,0.112,347.631028,14657.494792,19.340582,1,1,0,1


In [628]:
# Mostly get more US aid
aid[aid['cluster_pc_aid'] == 2]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all,cluster_all_aid,cluster_pc_aid
6,Cabo Verde,279419600.0,206126600.0,527552600.0,0.564,0.641,1.0,2.0,F,1.0,...,-1.0,2163.425146,0.077,539.140106,397.721329,1017.912719,0,1,0,2
14,Djibouti,189723500.0,77881100.0,474107700.0,0.361,0.475,4.0,5.0,PF,6.0,...,2.0,826.973923,0.114,211.109741,86.660117,527.550675,0,0,0,2
27,Lebanon,1388968000.0,62908710.0,4989869000.0,0.507431,0.732,6.0,5.0,NF,5.0,...,-2.0,2157.576513,0.224569,221.794562,10.045447,796.796976,0,0,0,2
29,Liberia,980563600.0,693038700.0,4512835000.0,0.422,0.464,5.0,6.0,PF,3.0,...,-4.0,279.396466,0.042,224.925436,158.971888,1035.171491,0,0,0,2
41,Sao Tome and Principe,55158990.0,0.0,38847670.0,0.48,0.577,1.0,2.0,F,2.0,...,1.0,150.674082,0.097,281.815956,0.0,198.478856,0,1,0,2
49,Tunisia,3714659000.0,135546300.0,1054239000.0,0.653,0.728,6.0,5.0,NF,1.0,...,-7.0,2090.659727,0.075,335.767137,12.251993,95.292374,0,0,0,2
50,Turkey,25606560000.0,8767650000.0,1776648000.0,0.655,0.792,4.0,5.0,PF,3.0,...,-2.0,8961.211862,0.137,331.55418,113.52368,23.00407,1,0,3,2


In [629]:
# Receives a ton of aid per capita from the United States
aid[aid['cluster_pc_aid'] == 3]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all,cluster_all_aid,cluster_pc_aid
56,Israel,0.0,66231500.0,102054100000.0,0.853,0.9,1.0,3.0,F,1.0,...,-1.0,11934.38594,0.047,0.0,8.061577,12421.83908,0,7,2,3


In [630]:
# Arab countries receiving a lot of US aid per capita 
aid[aid['cluster_pc_aid'] == 4]

Unnamed: 0,country,world_bank_totals,chinese_aid_totals,usaid_aid,hdi_00,hdi_14,pr_score00,cl_score00,fh_status00,pr_score14,...,fh_change,pc_gdp_change,hdi_change,world_bank_pc,chinese_aid_pc,usaid_pc,chinese_aid_total_max,clusters_all,cluster_all_aid,cluster_pc_aid
24,Iraq,1174182000.0,23499450.0,134310000000.0,0.608,0.662,7.0,7.0,NF,6.0,...,-2.0,4244.331964,0.054,34.121353,0.682886,3903.0044,0,8,2,4
25,Jordan,1829419000.0,157822400.0,25818000000.0,0.702,0.72,4.0,4.0,PF,6.0,...,3.0,1678.798121,0.018,205.090735,17.692999,2894.378653,0,4,0,4
58,Palestine,0.0,20111750.0,13369540000.0,0.507431,0.682,6.0,6.0,NF,6.0,...,0.0,1127.349164,0.174569,0.0,4.819035,3203.513553,0,0,0,4
