In this file, we will be looking at 3 different files based on types of goverment classifications, and trying to see how accurately we can predict these classifications based on prisoner numbers for each given country.

In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

First thing we will do is create our 3 dataframes which include country, goverment type, and various prison stats.

# Classification Dataframe 1

In [25]:
#Reading in first goverment classification dataframe
#https://en.wikipedia.org/wiki/List_of_countries_by_system_of_government
df1 = pd.read_csv('../../Datasets/Original_Data/gov_classification_1.csv')

In [26]:
#Reading in prison numbers by country dataframe
#https://worldpopulationreview.com/country-rankings/incarceration-rates-by-country
df_prison = pd.read_csv('../../Datasets/Original_Data/incarceration-rates-by-country-2023.csv')

In [27]:
df1.head()

Unnamed: 0,Name,Constitutional form,Head of state,Basis of executive legitimacy
0,Afghanistan,Provisional,,No constitutionally-defined basis to current r...
1,Albania,Republic,Ceremonial,Ministry is subject to parliamentary confidence
2,Algeria,Republic,Executive,Presidency independent of legislature; ministr...
3,Andorra,Constitutional monarchy,Ceremonial,Ministry is subject to parliamentary confidence
4,Angola,Republic,Executive,Presidency is independent of legislature


In [28]:
df_prison.head()

Unnamed: 0,country,incarcerationRatesByCountry_ratePer100k,incarcerationRatesByCountry_totIncarcerated,incarcerationRatesByCountry_percMale,incarcerationRatesByCountry_percFemale
0,United States,629,2068800,0.9,0.1
1,Rwanda,580,76099,0.95,0.05
2,Turkmenistan,576,35000,0.94,0.07
3,El Salvador,564,36663,0.93,0.07
4,Cuba,510,57337,,


In [29]:
#Shape of dataframes
df1.shape, df_prison.shape

((195, 4), (217, 5))

In [30]:
#Alphabetical order by country
df1 = df1.sort_values(by = 'Name', ascending = True)
df1.head()

Unnamed: 0,Name,Constitutional form,Head of state,Basis of executive legitimacy
0,Afghanistan,Provisional,,No constitutionally-defined basis to current r...
1,Albania,Republic,Ceremonial,Ministry is subject to parliamentary confidence
2,Algeria,Republic,Executive,Presidency independent of legislature; ministr...
3,Andorra,Constitutional monarchy,Ceremonial,Ministry is subject to parliamentary confidence
4,Angola,Republic,Executive,Presidency is independent of legislature


In [31]:
#Alphabetical order by country
df_prison = df_prison.sort_values(by = 'country', ascending = True)
df1.head()

Unnamed: 0,Name,Constitutional form,Head of state,Basis of executive legitimacy
0,Afghanistan,Provisional,,No constitutionally-defined basis to current r...
1,Albania,Republic,Ceremonial,Ministry is subject to parliamentary confidence
2,Algeria,Republic,Executive,Presidency independent of legislature; ministr...
3,Andorra,Constitutional monarchy,Ceremonial,Ministry is subject to parliamentary confidence
4,Angola,Republic,Executive,Presidency is independent of legislature


In [32]:
#Indexed by country
df1.set_index('Name', inplace = True)
df1.head()

Unnamed: 0_level_0,Constitutional form,Head of state,Basis of executive legitimacy
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,Provisional,,No constitutionally-defined basis to current r...
Albania,Republic,Ceremonial,Ministry is subject to parliamentary confidence
Algeria,Republic,Executive,Presidency independent of legislature; ministr...
Andorra,Constitutional monarchy,Ceremonial,Ministry is subject to parliamentary confidence
Angola,Republic,Executive,Presidency is independent of legislature


In [33]:
#Indexed by country
df_prison.set_index('country', inplace = True)
df_prison.head()

Unnamed: 0_level_0,incarcerationRatesByCountry_ratePer100k,incarcerationRatesByCountry_totIncarcerated,incarcerationRatesByCountry_percMale,incarcerationRatesByCountry_percFemale
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,77,28240,0.97,0.03
Albania,179,5042,0.98,0.02
Algeria,153,65000,0.99,0.02
American Samoa,345,193,0.96,0.04
Andorra,78,61,0.88,0.12


In [34]:
#Merge both dataframes
#https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.join.html
df_merge_1 = df1.join(df_prison, how = 'inner')
df_merge_1.head(10)
#Having problems merging based on index. Need to figure out why

Unnamed: 0,Constitutional form,Head of state,Basis of executive legitimacy,incarcerationRatesByCountry_ratePer100k,incarcerationRatesByCountry_totIncarcerated,incarcerationRatesByCountry_percMale,incarcerationRatesByCountry_percFemale


In [35]:
print(df1.index.dtype, df_prison.index.dtype)

object object


In [36]:
print(df1.index[0], df_prison.index[0])

 Afghanistan Afghanistan


In [37]:
print(len(df1.index[0]), len(df_prison.index[0]))
#Seems there is a space in the first dataframe's indicies. Will remove space

12 11


In [38]:
#Removing space
df1.index = df1.index.str.strip()
print(len(df1.index[0]), len(df_prison.index[0]))

11 11


In [39]:
#Merge both dataframes
#https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.join.html
df_merge_1 = df1.join(df_prison, how = 'inner')
df_merge_1.head(10)

Unnamed: 0,Constitutional form,Head of state,Basis of executive legitimacy,incarcerationRatesByCountry_ratePer100k,incarcerationRatesByCountry_totIncarcerated,incarcerationRatesByCountry_percMale,incarcerationRatesByCountry_percFemale
Afghanistan,Provisional,,No constitutionally-defined basis to current r...,77,28240,0.97,0.03
Albania,Republic,Ceremonial,Ministry is subject to parliamentary confidence,179,5042,0.98,0.02
Algeria,Republic,Executive,Presidency independent of legislature; ministr...,153,65000,0.99,0.02
Andorra,Constitutional monarchy,Ceremonial,Ministry is subject to parliamentary confidence,78,61,0.88,0.12
Angola,Republic,Executive,Presidency is independent of legislature,89,26000,0.97,0.03
Antigua and Barbuda,Constitutional monarchy,Ceremonial,Ministry is subject to parliamentary confidence,216,210,0.95,0.05
Argentina,Republic,Executive,Presidency is independent of legislature,243,109405,0.96,0.04
Armenia,Republic,Ceremonial,Ministry is subject to parliamentary confidence,72,2145,0.97,0.03
Australia,Constitutional monarchy,Ceremonial,Ministry is subject to parliamentary confidence,167,42909,0.93,0.08
Austria,Republic,Ceremonial,Ministry is subject to parliamentary confidence,91,8132,0.94,0.06


In [40]:
#Deleting unneccesary columns
df_merge_1.drop(columns = ['Head of state', 'Basis of executive legitimacy'], inplace = True)
df_merge_1.head()

Unnamed: 0,Constitutional form,incarcerationRatesByCountry_ratePer100k,incarcerationRatesByCountry_totIncarcerated,incarcerationRatesByCountry_percMale,incarcerationRatesByCountry_percFemale
Afghanistan,Provisional,77,28240,0.97,0.03
Albania,Republic,179,5042,0.98,0.02
Algeria,Republic,153,65000,0.99,0.02
Andorra,Constitutional monarchy,78,61,0.88,0.12
Angola,Republic,89,26000,0.97,0.03


# Classification Dataframe 2

In [41]:
#Reading in second goverment classification dataframe
#https://www.democracymatrix.com/ranking
df2 = pd.read_csv('../../Datasets/Original_Data//gov_classification_2.csv')

In [42]:
df2.head()

Unnamed: 0,Rank,Country,Total Value Index,Classification
0,1,Denmark,0.958,Working Democracy
1,2,Norway,0.956,Working Democracy
2,3,Finland,0.946,Working Democracy
3,4,Sweden,0.946,Working Democracy
4,5,Germany,0.944,Working Democracy


In [43]:
#Alphabetical order by country
df2 = df2.sort_values(by = 'Country', ascending = True)
df2.head()

Unnamed: 0,Rank,Country,Total Value Index,Classification
124,125,Afghanistan,0.358,Moderate Autocracy
78,79,Albania,0.583,Deficient Democracy
133,134,Algeria,0.319,Moderate Autocracy
119,120,Angola,0.387,Moderate Autocracy
43,44,Argentina,0.763,Deficient Democracy


In [44]:
#Indexed by country
df2.set_index('Country', inplace = True)
df2.head()

Unnamed: 0_level_0,Rank,Total Value Index,Classification
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,125,0.358,Moderate Autocracy
Albania,79,0.583,Deficient Democracy
Algeria,134,0.319,Moderate Autocracy
Angola,120,0.387,Moderate Autocracy
Argentina,44,0.763,Deficient Democracy


In [45]:
#Merge df2 with df_prison
df_merge_2 = df2.join(df_prison, how = 'inner')
df_merge_2.head()

Unnamed: 0,Rank,Total Value Index,Classification,incarcerationRatesByCountry_ratePer100k,incarcerationRatesByCountry_totIncarcerated,incarcerationRatesByCountry_percMale,incarcerationRatesByCountry_percFemale
Afghanistan,125,0.358,Moderate Autocracy,77,28240,0.97,0.03
Albania,79,0.583,Deficient Democracy,179,5042,0.98,0.02
Algeria,134,0.319,Moderate Autocracy,153,65000,0.99,0.02
Angola,120,0.387,Moderate Autocracy,89,26000,0.97,0.03
Argentina,44,0.763,Deficient Democracy,243,109405,0.96,0.04


In [46]:
#Deleting unneccesary columns
df_merge_2.drop(columns = ['Total Value Index', 'Rank'], inplace = True)
df_merge_2.head()

Unnamed: 0,Classification,incarcerationRatesByCountry_ratePer100k,incarcerationRatesByCountry_totIncarcerated,incarcerationRatesByCountry_percMale,incarcerationRatesByCountry_percFemale
Afghanistan,Moderate Autocracy,77,28240,0.97,0.03
Albania,Deficient Democracy,179,5042,0.98,0.02
Algeria,Moderate Autocracy,153,65000,0.99,0.02
Angola,Moderate Autocracy,89,26000,0.97,0.03
Argentina,Deficient Democracy,243,109405,0.96,0.04


# Classification Dataframe 3

In [47]:
#Reading in second goverment classification dataframe
#https://worldpopulationreview.com/country-rankings/democracy-countries
df3 = pd.read_csv('../../Datasets/Original_Data//gov_classification_3.csv')

In [48]:
df3.head()

Unnamed: 0,country,democracyCountries_category,democracyCountries_score,democracyCountries_elecProcess,democracyCountries_functioningOfGovt,democracyCountries_politParticipation,democracyCountries_politCulture,democracyCountries_civilLiberties
0,Norway,Full Democracy,9.81,10.0,9.64,10.0,10,9.41
1,Iceland,Full Democracy,9.37,10.0,8.57,8.0,10,9.41
2,Sweden,Full Democracy,9.26,9.0,9.29,8.0,10,9.12
3,New Zealand,Full Democracy,9.25,10.0,8.93,8.0,8,9.71
4,Canada,Full Democracy,9.24,9.0,8.93,8.0,9,9.41


In [49]:
#Keeping necessary columns, dropping rest
#https://saturncloud.io/blog/how-to-delete-a-column-in-pandas-dataframe-based-on-a-condition/#:~:text=Using%20the%20drop()%20function&text=The%20loc%5B%5D%20function%20is,based%20on%20labels%20or%20conditions.&text=In%20this%20example%2C%20we%20create,C'%20is%20greater%20than%208.
df3 = df3.loc[:, ['country', 'democracyCountries_category']]
df3.head()

Unnamed: 0,country,democracyCountries_category
0,Norway,Full Democracy
1,Iceland,Full Democracy
2,Sweden,Full Democracy
3,New Zealand,Full Democracy
4,Canada,Full Democracy


In [50]:
#Alphabetical order by country
df3 = df3.sort_values(by = 'country', ascending = True)
df3.head()

Unnamed: 0,country,democracyCountries_category
135,Afghanistan,Authoritarian Regime
69,Albania,Flawed Democracy
112,Algeria,Authoritarian Regime
114,Angola,Authoritarian Regime
46,Argentina,Flawed Democracy


In [51]:
#Indexed by country
df3.set_index('country', inplace = True)
df3.head()

Unnamed: 0_level_0,democracyCountries_category
country,Unnamed: 1_level_1
Afghanistan,Authoritarian Regime
Albania,Flawed Democracy
Algeria,Authoritarian Regime
Angola,Authoritarian Regime
Argentina,Flawed Democracy


In [52]:
#Merge df3 with df_prison
df_merge_3 = df3.join(df_prison, how = 'inner')
df_merge_3.head()

Unnamed: 0_level_0,democracyCountries_category,incarcerationRatesByCountry_ratePer100k,incarcerationRatesByCountry_totIncarcerated,incarcerationRatesByCountry_percMale,incarcerationRatesByCountry_percFemale
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,Authoritarian Regime,77,28240,0.97,0.03
Albania,Flawed Democracy,179,5042,0.98,0.02
Algeria,Authoritarian Regime,153,65000,0.99,0.02
Angola,Authoritarian Regime,89,26000,0.97,0.03
Argentina,Flawed Democracy,243,109405,0.96,0.04


# Modelling

In [53]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

In this section, we will attempt to create various models for our 3 dataframes and see how well we can predict a given government type based on prison stats.

In [54]:
df_merge_1.shape, df_merge_2.shape, df_merge_3.shape

((181, 5), (159, 5), (160, 5))

In [55]:
df_merge_1['Constitutional form'].value_counts()

Republic                   130
Constitutional monarchy     36
Provisional                 11
Absolute monarchy            4
Name: Constitutional form, dtype: int64

In [56]:
df_merge_1[df_merge_1['Constitutional form'] == 'Absolute monarchy']

Unnamed: 0,Constitutional form,incarcerationRatesByCountry_ratePer100k,incarcerationRatesByCountry_totIncarcerated,incarcerationRatesByCountry_percMale,incarcerationRatesByCountry_percFemale
Brunei,Absolute monarchy,186,841,0.88,0.12
Eswatini,Absolute monarchy,277,3796,0.97,0.03
Oman,Absolute monarchy,45,1960,0.96,0.05
Saudi Arabia,Absolute monarchy,207,68056,0.98,0.02


In [57]:
df_merge_2['Classification'].value_counts()

Deficient Democracy    45
Hybrid Regime          35
Working Democracy      33
Moderate Autocracy     30
Hard Autocracy         16
Name: Classification, dtype: int64

In [58]:
df_merge_3['democracyCountries_category'].value_counts()

Authoritarian Regime    53
Flawed Democracy        51
Hybrid Regime           34
Full Democracy          22
Name: democracyCountries_category, dtype: int64

In [59]:
df_merge_3.loc['El Salvador']

democracyCountries_category                    Hybrid Regime
incarcerationRatesByCountry_ratePer100k                  564
incarcerationRatesByCountry_totIncarcerated            36663
incarcerationRatesByCountry_percMale                    0.93
incarcerationRatesByCountry_percFemale                  0.07
Name: El Salvador, dtype: object

### df_merge_1 Modelling

In [60]:
df_merge_1.head()

Unnamed: 0,Constitutional form,incarcerationRatesByCountry_ratePer100k,incarcerationRatesByCountry_totIncarcerated,incarcerationRatesByCountry_percMale,incarcerationRatesByCountry_percFemale
Afghanistan,Provisional,77,28240,0.97,0.03
Albania,Republic,179,5042,0.98,0.02
Algeria,Republic,153,65000,0.99,0.02
Andorra,Constitutional monarchy,78,61,0.88,0.12
Angola,Republic,89,26000,0.97,0.03


In [61]:
df_merge_1['Constitutional form'].value_counts()

Republic                   130
Constitutional monarchy     36
Provisional                 11
Absolute monarchy            4
Name: Constitutional form, dtype: int64

In [62]:
df_merge_1.tail(35)

Unnamed: 0,Constitutional form,incarcerationRatesByCountry_ratePer100k,incarcerationRatesByCountry_totIncarcerated,incarcerationRatesByCountry_percMale,incarcerationRatesByCountry_percFemale
Slovenia,Republic,64,1360,0.95,0.05
Solomon Islands,Constitutional monarchy,79,500,0.98,0.02
South Africa,Republic,248,147922,0.98,0.02
South Korea,Republic,105,53920,0.93,0.07
South Sudan,Republic,50,7000,0.89,0.11
Spain,Constitutional monarchy,113,55433,0.93,0.07
Sri Lanka,Republic,135,28915,0.95,0.05
Sudan,Provisional,52,21000,0.98,0.02
Suriname,Republic,185,1000,0.97,0.03
Sweden,Constitutional monarchy,73,7607,0.94,0.06


In [63]:
df_merge_1.dropna(inplace = True)

In [64]:
y = df_merge_1['Constitutional form']
X = df_merge_1.drop(columns = 'Constitutional form')

In [65]:
#Logistic Regression
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
lr = LogisticRegression()
lr.fit(X_train, y_train)
print(lr.score(X_train, y_train), lr.score(X_test, y_test))

0.7342657342657343 0.6388888888888888


In [66]:
#KNN
ss = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
X_train_sc = ss.fit_transform(X_train)
X_test_sc = ss.transform(X_test)
knn = KNeighborsClassifier()
knn.fit(X_train_sc, y_train)
print(knn.score(X_train, y_train), knn.score(X_test, y_test))

0.7342657342657343 0.6388888888888888




In [67]:
#Decision tree
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
print(dt.score(X_train, y_train), dt.score(X_test, y_test))

1.0 0.6666666666666666


In [68]:
#Bagged trees
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
bt = BaggingClassifier()
bt.fit(X_train, y_train)
print(bt.score(X_train, y_train), bt.score(X_test, y_test))

0.958041958041958 0.5833333333333334


In [69]:
#Random Forest
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
print(rf.score(X_train, y_train), rf.score(X_test, y_test))

1.0 0.5833333333333334


In [70]:
#Adaboost
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
ada = AdaBoostClassifier()
ada.fit(X_train, y_train)
print(ada.score(X_train, y_train), ada.score(X_test, y_test))

0.3916083916083916 0.4444444444444444


In [71]:
#Support Vector Model
ss = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
X_train_scc = ss.fit_transform(X_train)
X_test_scc = ss.transform(X_test)
svc = SVC()
svc.fit(X_train_scc, y_train)
print(svc.score(X_train, y_train), svc.score(X_test, y_test))

0.7342657342657343 0.6388888888888888




### df_merge_2 Modelling

In [72]:
df_merge_2.head()

Unnamed: 0,Classification,incarcerationRatesByCountry_ratePer100k,incarcerationRatesByCountry_totIncarcerated,incarcerationRatesByCountry_percMale,incarcerationRatesByCountry_percFemale
Afghanistan,Moderate Autocracy,77,28240,0.97,0.03
Albania,Deficient Democracy,179,5042,0.98,0.02
Algeria,Moderate Autocracy,153,65000,0.99,0.02
Angola,Moderate Autocracy,89,26000,0.97,0.03
Argentina,Deficient Democracy,243,109405,0.96,0.04


In [73]:
df_merge_2.dropna(inplace = True)

In [74]:
y = df_merge_2['Classification']
X = df_merge_2.drop(columns = 'Classification')

In [75]:
#Logistic Regression
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
lr = LogisticRegression()
lr.fit(X_train, y_train)
print(lr.score(X_train, y_train), lr.score(X_test, y_test))

0.32 0.28125


In [76]:
#KNN
ss = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
X_train_sc = ss.fit_transform(X_train)
X_test_sc = ss.transform(X_test)
knn = KNeighborsClassifier()
knn.fit(X_train_sc, y_train)
print(knn.score(X_train, y_train), knn.score(X_test, y_test))

0.2 0.09375




In [77]:
#Decision tree
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
print(dt.score(X_train, y_train), dt.score(X_test, y_test))

1.0 0.21875


In [78]:
#Bagged trees
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
bt = BaggingClassifier()
bt.fit(X_train, y_train)
print(bt.score(X_train, y_train), bt.score(X_test, y_test))

0.952 0.34375


In [79]:
#Random Forest
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
print(rf.score(X_train, y_train), rf.score(X_test, y_test))

1.0 0.28125


In [80]:
#Adaboost
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
ada = AdaBoostClassifier()
ada.fit(X_train, y_train)
print(ada.score(X_train, y_train), ada.score(X_test, y_test))

0.504 0.3125


In [81]:
#Support Vector Model
ss = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
X_train_scc = ss.fit_transform(X_train)
X_test_scc = ss.transform(X_test)
svc = SVC()
svc.fit(X_train_scc, y_train)
print(svc.score(X_train, y_train), svc.score(X_test, y_test))

0.2 0.3125




### df_merge_3 Modelling

In [82]:
df_merge_3.head()

Unnamed: 0_level_0,democracyCountries_category,incarcerationRatesByCountry_ratePer100k,incarcerationRatesByCountry_totIncarcerated,incarcerationRatesByCountry_percMale,incarcerationRatesByCountry_percFemale
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,Authoritarian Regime,77,28240,0.97,0.03
Albania,Flawed Democracy,179,5042,0.98,0.02
Algeria,Authoritarian Regime,153,65000,0.99,0.02
Angola,Authoritarian Regime,89,26000,0.97,0.03
Argentina,Flawed Democracy,243,109405,0.96,0.04


In [83]:
df_merge_3.dropna(inplace = True)

In [84]:
y = df_merge_3['democracyCountries_category']
X = df_merge_3.drop(columns = 'democracyCountries_category')

In [85]:
#Logistic Regression
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
lr = LogisticRegression()
lr.fit(X_train, y_train)
print(lr.score(X_train, y_train), lr.score(X_test, y_test))

0.3412698412698413 0.25


In [86]:
#KNN
ss = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
X_train_sc = ss.fit_transform(X_train)
X_test_sc = ss.transform(X_test)
knn = KNeighborsClassifier()
knn.fit(X_train_sc, y_train)
print(knn.score(X_train, y_train), knn.score(X_test, y_test))

0.3412698412698413 0.25




In [87]:
#Decision tree
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
print(dt.score(X_train, y_train), dt.score(X_test, y_test))

1.0 0.28125


In [88]:
#Bagged trees
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
bt = BaggingClassifier()
bt.fit(X_train, y_train)
print(bt.score(X_train, y_train), bt.score(X_test, y_test))

0.9603174603174603 0.28125


In [89]:
#Random Forest
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
print(rf.score(X_train, y_train), rf.score(X_test, y_test))

1.0 0.375


In [90]:
#Adaboost
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
ada = AdaBoostClassifier()
ada.fit(X_train, y_train)
print(ada.score(X_train, y_train), ada.score(X_test, y_test))

0.6349206349206349 0.375


In [91]:
#Support Vector Model
ss = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
X_train_scc = ss.fit_transform(X_train)
X_test_scc = ss.transform(X_test)
svc = SVC()
svc.fit(X_train_scc, y_train)
print(svc.score(X_train, y_train), svc.score(X_test, y_test))

0.31746031746031744 0.34375




In [92]:
df_merge_3['democracyCountries_category'].value_counts(normalize = True)

Authoritarian Regime    0.322785
Flawed Democracy        0.322785
Hybrid Regime           0.215190
Full Democracy          0.139241
Name: democracyCountries_category, dtype: float64

In [93]:
df_merge_3['democracyCountries_category'].value_counts()

Authoritarian Regime    51
Flawed Democracy        51
Hybrid Regime           34
Full Democracy          22
Name: democracyCountries_category, dtype: int64