# EDA and Cleaning Freedom House

In [103]:
# Importing libraries 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [104]:
# Reading in Freedom House ratings 
freedom_house = pd.read_excel('./aid_data/freedom_house/fh_countries.xlsx', sheet_name='ratings')

About Freedom House ratings:

"The political rights and civil liberties ratings range from 1 to 7, with 1 representing the
most free and 7 the least free. The status designation of Free, Partly Free, or Not Free, which
is determined by the average of the political rights and civil liberties ratings, indicates the
general state of freedom in a country or territory." (https://freedomhouse.org/sites/default/files/2020-02/FreedomintheWorld2018COMPLETEBOOK.pdf)

In [105]:
# Looking at my dataframe 
freedom_house.head()

Unnamed: 0,Country,2000 PR,2000 CL,2000 Status,2001 PR,2001 CL,2001 Status,2002 PR,2002 CL,2002 Status,...,2011 Status,2012 PR,2012 CL,2012 Status,2013 PR,2013 CL,2013 Status,2014 PR,2014 CL,2014 Status
0,Algeria,6,5,NF,6,5,NF,6,5,NF,...,NF,6,5,NF,6,5,NF,6,5,NF
1,Angola,6,6,NF,6,6,NF,6,5,NF,...,NF,6,5,NF,6,5,NF,6,5,NF
2,Bahrain,7,6,NF,6,5,NF,5,5,PF,...,NF,6,6,NF,6,6,NF,7,6,NF
3,Benin,2,2,F,3,2,F,3,2,F,...,F,2,2,F,2,2,F,2,2,F
4,Botswana,2,2,F,2,2,F,2,2,F,...,F,3,2,F,3,2,F,3,2,F


In [106]:
# Setting the index as country 
freedom_house = freedom_house.set_index('Country')

In [107]:
# Stacking the dataframe 
freedom_house = freedom_house.stack(dropna=False)

In [108]:
# Looking at my new data 
freedom_house

Country                                  
Algeria                       2000 PR         6
                              2000 CL         5
                              2000 Status    NF
                              2001 PR         6
                              2001 CL         5
                                             ..
Israeli-Occupied Territories  2013 CL         -
                              2013 Status     -
                              2014 PR         -
                               2014 CL        -
                              2014 Status     -
Length: 3060, dtype: object

In [109]:
# Saving the new data to a data frame
freedom_house = freedom_house.to_frame()

In [110]:
# Looking at the head of the data frame 
freedom_house.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,0
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Algeria,2000 PR,6
Algeria,2000 CL,5
Algeria,2000 Status,NF
Algeria,2001 PR,6
Algeria,2001 CL,5


In [111]:
# Saving this to a csv 
freedom_house.to_csv('./aid_data/freedom_house/freedom_house.csv', index=True)

In [112]:
# Reading out the new data frame
freedom_house = pd.read_csv('./aid_data/freedom_house/freedom_house.csv')

In [113]:
# Looking at my data 
freedom_house.head()

Unnamed: 0,Country,Unnamed: 1,0
0,Algeria,2000 PR,6
1,Algeria,2000 CL,5
2,Algeria,2000 Status,NF
3,Algeria,2001 PR,6
4,Algeria,2001 CL,5


In [114]:
# Looking at the info for my columns 
freedom_house.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3060 entries, 0 to 3059
Data columns (total 3 columns):
Country       3060 non-null object
Unnamed: 1    3060 non-null object
0             3060 non-null object
dtypes: object(3)
memory usage: 71.8+ KB


In [115]:
# Making a separate data frame with the year and type of rating split off 
freedom_house2 = freedom_house['Unnamed: 1'].str.split(expand=True)

In [116]:
# Looking at the data frame I made 
freedom_house2.head()

Unnamed: 0,0,1
0,2000,PR
1,2000,CL
2,2000,Status
3,2001,PR
4,2001,CL


In [117]:
# Merging my 2 data frames 
freedom_house = freedom_house.merge(freedom_house2, left_index=True, right_index=True)

In [118]:
# Looking at my combined data frame 
freedom_house.head()

Unnamed: 0,Country,Unnamed: 1,0,0.1,1
0,Algeria,2000 PR,6,2000,PR
1,Algeria,2000 CL,5,2000,CL
2,Algeria,2000 Status,NF,2000,Status
3,Algeria,2001 PR,6,2001,PR
4,Algeria,2001 CL,5,2001,CL


In [119]:
# Dropping the unneeded column 
freedom_house= freedom_house.drop(columns=['Unnamed: 1'])

In [120]:
# Looking at my new data frame 
freedom_house.head()

Unnamed: 0,Country,0,0.1,1
0,Algeria,6,2000,PR
1,Algeria,5,2000,CL
2,Algeria,NF,2000,Status
3,Algeria,6,2001,PR
4,Algeria,5,2001,CL


In [121]:
# Saving my new data frame 
freedom_house.to_csv('./aid_data/freedom_house/freedom_house.csv', index=False)

In [122]:
# Reading the saved csv into a new data frame 
freedom_house = pd.read_csv('./aid_data/freedom_house/freedom_house.csv')

In [123]:
# Looking at my dataframe 
freedom_house.head()

Unnamed: 0,Country,0,0.1,1
0,Algeria,6,2000,PR
1,Algeria,5,2000,CL
2,Algeria,NF,2000,Status
3,Algeria,6,2001,PR
4,Algeria,5,2001,CL


In [124]:
# Making a dataframe for political rights data 
freedom_house1 = freedom_house[freedom_house['1'] == 'PR'] 

In [125]:
# Making a dataframe for civil liberties data 
freedom_house2 = freedom_house[freedom_house['1'] == 'CL'] 

In [126]:
# Making a dataframe for status 
freedom_house3 = freedom_house[freedom_house['1'] == 'Status'] 

In [127]:
# Looking at one fo the data frames 
freedom_house1.head()

Unnamed: 0,Country,0,0.1,1
0,Algeria,6,2000,PR
3,Algeria,6,2001,PR
6,Algeria,6,2002,PR
9,Algeria,6,2003,PR
12,Algeria,6,2004,PR


In [128]:
# Dropping the column because I am making a new column to say this is a political rights score 
freedom_house1 = freedom_house1.drop(columns=['1'])

In [129]:
# Renaming the columns 
freedom_house1.rename(columns={'Country': 'country', '0': 'pr_score', '0.1': 'year'}, inplace=True)

In [130]:
# Looking at my data frame with renamed columns 
freedom_house1.head()

Unnamed: 0,country,pr_score,year
0,Algeria,6,2000
3,Algeria,6,2001
6,Algeria,6,2002
9,Algeria,6,2003
12,Algeria,6,2004


In [131]:
# Looking at my civil liberties data frame 
freedom_house2.head()

Unnamed: 0,Country,0,0.1,1
1,Algeria,5,2000,CL
4,Algeria,5,2001,CL
7,Algeria,5,2002,CL
10,Algeria,5,2003,CL
13,Algeria,5,2004,CL


In [132]:
# Dropping the CL column that I don't need 
freedom_house2 = freedom_house2.drop(columns=['1'])

In [133]:
# Renaming my columns 
freedom_house2.rename(columns={'Country': 'country', '0': 'cl_score', '0.1': 'year'}, inplace=True)

In [134]:
# Looking at my new data frame 
freedom_house2.head()

Unnamed: 0,country,cl_score,year
1,Algeria,5,2000
4,Algeria,5,2001
7,Algeria,5,2002
10,Algeria,5,2003
13,Algeria,5,2004


In [135]:
# Looking at the status data frame 
freedom_house3.head()

Unnamed: 0,Country,0,0.1,1
2,Algeria,NF,2000,Status
5,Algeria,NF,2001,Status
8,Algeria,NF,2002,Status
11,Algeria,NF,2003,Status
14,Algeria,NF,2004,Status


In [136]:
# Dropping the column I don't need 
freedom_house3 = freedom_house3.drop(columns=['1'])

In [137]:
# Renaming my columns 
freedom_house3.rename(columns={'Country': 'country', '0': 'fh_status', '0.1': 'year'}, inplace=True)

In [138]:
# Looking at my new data frame 
freedom_house3.head()

Unnamed: 0,country,fh_status,year
2,Algeria,NF,2000
5,Algeria,NF,2001
8,Algeria,NF,2002
11,Algeria,NF,2003
14,Algeria,NF,2004


In [139]:
# Combining the first two data frames 
freedom_house = pd.merge(freedom_house1, freedom_house2, how='outer', on=['country', 'year'])

In [140]:
# Seeing if the two combined properly 
freedom_house.head()

Unnamed: 0,country,pr_score,year,cl_score
0,Algeria,6,2000,5
1,Algeria,6,2001,5
2,Algeria,6,2002,5
3,Algeria,6,2003,5
4,Algeria,6,2004,5


In [141]:
# Combining the third data frame with the combined one
freedom_house = pd.merge(freedom_house, freedom_house3, how='outer', on=['country', 'year'])

In [142]:
# Rearranging the order of the columns 
freedom_house = freedom_house[['country', 'year', 'pr_score', 'cl_score', 'fh_status']]

In [143]:
# Looking at my new dataframe 
freedom_house.head()

Unnamed: 0,country,year,pr_score,cl_score,fh_status
0,Algeria,2000,6,5,NF
1,Algeria,2001,6,5,NF
2,Algeria,2002,6,5,NF
3,Algeria,2003,6,5,NF
4,Algeria,2004,6,5,NF


In [144]:
# Looking at null values and data types. PR and CL need to be numeric 
freedom_house.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1020 entries, 0 to 1019
Data columns (total 5 columns):
country      1020 non-null object
year         1020 non-null int64
pr_score     1020 non-null object
cl_score     1020 non-null object
fh_status    1020 non-null object
dtypes: int64(1), object(4)
memory usage: 47.8+ KB


In [145]:
# Changing the PR score to numeric 
freedom_house['pr_score'] = pd.to_numeric(freedom_house['pr_score'], errors='coerce')

In [146]:
# Changing the CL score to numeric 
freedom_house['cl_score'] = pd.to_numeric(freedom_house['cl_score'], errors='coerce')

In [147]:
# Looking at the data types and null values
# There are now null values I will have to deal with later 
freedom_house.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1020 entries, 0 to 1019
Data columns (total 5 columns):
country      1020 non-null object
year         1020 non-null int64
pr_score     1004 non-null float64
cl_score     1004 non-null float64
fh_status    1020 non-null object
dtypes: float64(2), int64(1), object(2)
memory usage: 47.8+ KB


In [148]:
# Dropping rows will null values 
freedom_house.dropna(inplace=True)

In [149]:
# Looking at my new data frame 
freedom_house.head()

Unnamed: 0,country,year,pr_score,cl_score,fh_status
0,Algeria,2000,6.0,5.0,NF
1,Algeria,2001,6.0,5.0,NF
2,Algeria,2002,6.0,5.0,NF
3,Algeria,2003,6.0,5.0,NF
4,Algeria,2004,6.0,5.0,NF


In [150]:
# Looking at unique country values 
freedom_house['country'].unique()

array(['Algeria', 'Angola', 'Bahrain', 'Benin', 'Botswana',
       'Burkina Faso', 'Burundi', 'Cameroon', 'Cape Verde',
       'Central African Republic', 'Chad', 'Comoros',
       'Congo (Brazzaville)', 'Congo (Kinshasa)', "Cote d'Ivoire",
       'Djibouti', 'Egypt', 'Equatorial Guinea', 'Eritrea', 'Ethiopia',
       'Gabon', 'Ghana', 'Guinea', 'Guinea-Bissau', 'Iran', 'Iraq',
       'Israel', 'Jordan', 'Kenya', 'Kuwait', 'Lebanon', 'Lesotho',
       'Liberia', 'Libya', 'Madagascar', 'Malawi', 'Mali', 'Mauritania',
       'Mauritius', 'Morocco', 'Mozambique', 'Namibia', 'Niger',
       'Nigeria', 'Oman', 'Qatar', 'Rwanda', 'Sao Tome and Principe',
       'Saudi Arabia', 'Senegal', 'Sierra Leone', 'Somalia',
       'South Africa', 'South Sudan', 'Sudan', 'Swaziland', 'Syria',
       'Tanzania', 'The Gambia', 'Togo', 'Tunisia', 'Turkey', 'Uganda',
       'United Arab Emirates', 'Yemen', 'Zambia', 'Zimbabwe',
       'Israeli-Occupied Territories'], dtype=object)

In [151]:
# Changing country names for consistency 
freedom_house.replace({'country' : {"Cote d'Ivoire" : "Cote D'Ivoire",
                    'Congo (Brazzaville)' : 'Congo',
                    'Congo (Kinshasa)': 'Democratic Republic of Congo',
                    'The Gambia' : 'Gambia',
                    'Israeli-Occupied Territories' : 'Palestine',
                    'Cape Verde' : 'Cabo Verde'}}, inplace=True)

In [152]:
# Looking at the new list 
freedom_house['country'].unique()

array(['Algeria', 'Angola', 'Bahrain', 'Benin', 'Botswana',
       'Burkina Faso', 'Burundi', 'Cameroon', 'Cabo Verde',
       'Central African Republic', 'Chad', 'Comoros', 'Congo',
       'Democratic Republic of Congo', "Cote D'Ivoire", 'Djibouti',
       'Egypt', 'Equatorial Guinea', 'Eritrea', 'Ethiopia', 'Gabon',
       'Ghana', 'Guinea', 'Guinea-Bissau', 'Iran', 'Iraq', 'Israel',
       'Jordan', 'Kenya', 'Kuwait', 'Lebanon', 'Lesotho', 'Liberia',
       'Libya', 'Madagascar', 'Malawi', 'Mali', 'Mauritania', 'Mauritius',
       'Morocco', 'Mozambique', 'Namibia', 'Niger', 'Nigeria', 'Oman',
       'Qatar', 'Rwanda', 'Sao Tome and Principe', 'Saudi Arabia',
       'Senegal', 'Sierra Leone', 'Somalia', 'South Africa',
       'South Sudan', 'Sudan', 'Swaziland', 'Syria', 'Tanzania', 'Gambia',
       'Togo', 'Tunisia', 'Turkey', 'Uganda', 'United Arab Emirates',
       'Yemen', 'Zambia', 'Zimbabwe', 'Palestine'], dtype=object)

In [153]:
# Saving my data frame to a csv 
freedom_house.to_csv('./aid_data/freedom_house/freedom_house_clean.csv', index=False)