In [1]:
import pandas as pd 
import numpy as np
from matplotlib import pyplot as plt 
import seaborn as sns

### 1. Exploration of data

In [149]:
# Average daily income [$ american dollars]
average_daily_income = pd.read_csv('./data/average_daily_income.csv', sep=';')
average_daily_income.shape

(195, 223)

In [150]:
average_daily_income.duplicated().sum()

0

In [151]:
average_daily_income.head()

Unnamed: 0,country,1800,1801,1802,1803,1804,1805,1806,1807,1808,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Afghanistan,919.0,919.0,919.0,919.0,919.0,919.0,919.0,919.0,919.0,...,2.83,2.89,2.87,2.82,2.81,2.81,2.77,2.82,2.62,2.66
1,Angola,2.27,2.27,2.28,2.29,2.3,2.31,2.32,2.33,2.35,...,3.73,3.76,3.78,3.67,3.55,3.37,3.12,2.99,2.79,2.72
2,Albania,1.22,1.22,1.22,1.22,1.22,1.22,1.22,1.22,1.22,...,7.99,8.2,8.52,8.62,8.79,8.73,9.04,9.35,9.04,9.5
3,Andorra,1.48,1.49,1.49,1.49,1.49,1.5,1.5,1.5,1.51,...,52.0,54.2,55.7,57.7,59.7,61.7,63.8,66.0,68.3,70.6
4,United Arab Emirates,2.9,2.91,2.92,2.93,2.94,2.95,2.96,2.97,2.98,...,70.4,73.6,83.7,69.1,71.2,78.7,84.1,94.0,85.8,85.8


In [152]:
average_daily_income.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Columns: 223 entries, country to 2021
dtypes: float64(222), object(1)
memory usage: 339.9+ KB


In [153]:
average_daily_income.isnull().sum()

country    0
1800       0
1801       0
1802       0
1803       0
          ..
2017       0
2018       0
2019       0
2020       0
2021       0
Length: 223, dtype: int64

In [154]:
# to check if there are any null values in the df.
average_daily_income.isnull().any().any()

False

In [155]:
continents = pd.read_csv('./data/continents.csv', sep=';')
continents.head()

Unnamed: 0,continent,country
0,Africa,Algeria
1,Africa,Angola
2,Africa,Benin
3,Africa,Botswana
4,Africa,Burkina


In [156]:
continents.duplicated().sum()

0

In [157]:
continents.shape

(194, 2)

In [158]:
continents.isnull().any().any()

False

In [159]:
# Democracy Index [0 to 100]
democracy = pd.read_csv('./data/democracy.csv')
democracy.head()

Unnamed: 0,country,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Afghanistan,30.6,30.4,30.2,27.5,24.8,24.8,24.8,24.8,27.7,27.7,25.5,25.5,29.7,28.5,28.5
1,Angola,24.1,28.8,33.5,33.4,33.2,33.2,33.5,33.5,33.5,33.5,34.0,36.2,36.2,37.2,36.6
2,Albania,59.1,59.1,59.1,58.9,58.6,58.1,56.7,56.7,56.7,59.1,59.1,59.8,59.8,58.9,60.8
3,United Arab Emirates,24.2,25.1,26.0,25.6,25.2,25.8,25.8,25.2,26.4,27.5,27.5,26.9,27.6,27.6,27.0
4,Argentina,66.3,66.3,66.3,67.3,68.4,68.4,68.4,68.4,68.4,70.2,69.6,69.6,70.2,70.2,69.5


The Democracy Index is based on 60 indicators, grouped into five categories: electoral process and pluralism, civil liberties, functioning of government, political participation and political culture.

In [160]:
democracy.duplicated().sum()

0

In [161]:
democracy.shape

(167, 16)

In [162]:
democracy.isnull().any().any()

True

In [163]:
democracy.isnull().sum()

country    0
2006       0
2007       0
2008       0
2009       0
2010       0
2011       0
2012       0
2013       0
2014       0
2015       0
2016       0
2017       0
2018       0
2019       0
2020       4
dtype: int64

In [164]:
democracy[democracy['2020'].isnull()]

Unnamed: 0,country,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
42,Algeria,31.7,32.5,33.2,33.8,34.4,34.4,38.3,38.3,38.3,39.5,35.6,35.6,35.0,40.1,
71,Iran,29.3,28.8,28.3,23.9,19.4,19.8,19.8,19.8,19.8,21.6,23.4,24.5,24.5,23.8,
91,Lithuania,74.3,74.0,73.6,73.0,72.4,72.4,72.4,75.4,75.4,75.4,74.7,74.1,75.0,75.0,
157,Ukraine,69.4,69.4,69.4,66.2,63.0,59.4,59.1,58.4,54.2,57.0,57.0,56.9,56.9,59.0,


In [165]:
# Index of functioning government (EIU) [0 to 100]
functioning_government = pd.read_csv('./data/functioning_government.csv')
functioning_government.head()

Unnamed: 0,country,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Afghanistan,0.0,3.95,7.9,7.9,7.9,7.9,7.9,7.9,11.4,11.4,11.4,11.4,11.4,6.4,6.4
1,Angola,21.4,26.8,32.1,32.1,32.1,32.1,32.1,32.1,32.1,32.1,32.1,28.6,28.6,28.6,28.6
2,Albania,50.7,50.7,50.7,50.7,50.7,47.1,40.0,40.0,40.0,43.6,43.6,47.1,47.1,53.6,53.6
3,United Arab Emirates,30.7,35.0,39.3,37.5,35.7,35.7,35.7,35.7,35.7,35.7,35.7,35.7,39.3,39.3,39.3
4,Argentina,50.0,50.0,50.0,53.5,57.1,57.1,57.1,57.1,57.1,50.0,50.0,50.0,53.6,53.6,53.6


Functioning government
The variable captures the extent to which citizens have a functioning government that acts on their behalf.

It ranges from 0 to 10 (most effective).

It matches the variable GovernmentindexEIU in gapminder, with a few missing countries for 2020 and all for 2021 added.

Source
Economist Intelligence Unit (2023), Economist Intelligence Unit (2021) via gapminder – processed by Our World in Data
Links
http://www.gapm.io/dxlsdemocrix (for years 2006-2020)
https://www.eiu.com/n/campaigns/democracy-index-2021/ (for year 2021)
https://www.eiu.com/n/campaigns/democracy-index-2022/ (for year 2022)

In [166]:
functioning_government.duplicated().sum()

0

In [167]:
functioning_government.shape

(167, 16)

In [168]:
functioning_government.isnull().any().any()

True

In [169]:
functioning_government.isnull().sum()

country    0
2006       0
2007       0
2008       0
2009       0
2010       0
2011       0
2012       0
2013       0
2014       0
2015       0
2016       0
2017       0
2018       0
2019       0
2020       4
dtype: int64

In [170]:
functioning_government[functioning_government['2020'].isnull()]

Unnamed: 0,country,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
42,Algeria,22.1,22.1,22.1,22.1,22.1,22.1,22.1,22.1,22.1,22.1,22.1,22.1,22.1,28.6,
71,Iran,35.7,32.1,28.6,30.4,32.1,28.6,28.6,28.6,28.6,28.6,32.1,32.1,32.1,28.6,
91,Lithuania,64.3,60.7,57.1,57.1,57.1,57.1,57.1,60.7,60.7,60.7,57.1,57.1,64.3,64.3,
157,Ukraine,57.1,55.4,53.6,51.8,50.0,46.4,46.4,42.9,39.3,39.3,39.3,32.1,32.1,27.1,


In [171]:
# Happiness score (from the Happiness world report)[0 to 100]
happiness_score = pd.read_csv('./data/happiness_score_whr.csv')
happiness_score.head()

Unnamed: 0,country,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Afghanistan,,,,37.2,44.0,47.6,38.3,37.8,35.7,31.3,39.8,42.2,26.6,26.9,25.7
1,Angola,,,,,,,55.9,43.6,39.4,37.9,,,,,
2,Albania,,,46.3,,54.9,52.7,58.7,55.1,45.5,48.1,46.1,45.1,46.4,50.0,48.8
3,United Arab Emirates,,67.3,,,68.7,71.0,71.2,72.2,66.2,65.4,65.7,68.3,70.4,66.0,67.9
4,Argentina,,63.1,60.7,59.6,64.2,64.4,67.8,64.7,65.8,66.7,67.0,64.3,60.4,57.9,59.7


In [172]:
happiness_score.duplicated().sum()

0

In [173]:
happiness_score.shape

(163, 16)

In [174]:
happiness_score.isnull().sum().sum()

608

In [175]:
missing_values_happiness = happiness_score[happiness_score['2019'].isnull()]
missing_values_happiness

Unnamed: 0,country,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
1,Angola,,,,,,,55.9,43.6,39.4,37.9,,,,,
18,Belize,,,64.5,,,,,,,59.6,,,,,
21,Bhutan,,,,,,,,,55.7,49.4,50.8,,,,
35,Cuba,,54.2,,,,,,,,,,,,,
39,Djibouti,,,,50.1,49.1,50.1,43.7,,,,,,,,
58,Guyana,,,59.9,,,,,,,,,,,,
114,Oman,,,,,,,68.5,,,,,,,,
123,Qatar,,,,,64.2,68.5,65.9,66.1,,,63.7,,,,
128,Sudan,,,,,44.5,44.4,43.1,45.5,,41.4,,,,,
133,Somalia,,,,,49.9,46.6,49.3,50.6,,55.3,53.5,46.7,,,


In [29]:
# Income Inequality= [Gini coefficient from 0 to 100:= complete inequality]
# Gini coefficient, 0 = complete equality; 1 = complete inequality
income_inequality = pd.read_csv('./data/income-inequality.csv', sep=';')
income_inequality.head()

Unnamed: 0,country,1800,1801,1802,1803,1804,1805,1806,1807,1808,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Afghanistan,30.5,30.5,30.5,30.5,30.5,30.5,30.5,30.5,30.5,...,37.7,37.7,37.7,37.7,37.7,37.7,37.7,37.7,38.3,39.0
1,Angola,57.7,57.7,57.7,57.7,57.7,57.7,57.7,57.7,57.7,...,46.0,46.8,47.7,48.5,49.4,50.3,51.3,51.3,52.1,53.0
2,Albania,39.9,39.9,39.9,39.9,39.9,39.9,39.9,39.9,39.9,...,29.0,31.6,34.6,32.9,33.7,33.2,33.2,33.2,33.8,34.5
3,Andorra,42.5,42.5,42.5,42.5,42.5,42.5,42.5,42.5,42.5,...,35.5,35.4,35.4,35.3,35.2,35.2,35.1,35.0,35.0,35.0
4,United Arab Emirates,39.8,39.8,39.8,39.8,39.8,39.8,39.8,39.7,39.7,...,32.6,32.5,31.1,29.7,28.4,27.2,26.0,26.0,26.0,26.7


In [29]:
# Income Inequality= [Gini coefficient from 0 to 100:= complete inequality]
# Gini coefficient, 0 = complete equality; 1 = complete inequality
income_inequality = pd.read_csv('./data/income-inequality.csv', sep=';')
income_inequality.head()

Unnamed: 0,country,1800,1801,1802,1803,1804,1805,1806,1807,1808,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Afghanistan,30.5,30.5,30.5,30.5,30.5,30.5,30.5,30.5,30.5,...,37.7,37.7,37.7,37.7,37.7,37.7,37.7,37.7,38.3,39.0
1,Angola,57.7,57.7,57.7,57.7,57.7,57.7,57.7,57.7,57.7,...,46.0,46.8,47.7,48.5,49.4,50.3,51.3,51.3,52.1,53.0
2,Albania,39.9,39.9,39.9,39.9,39.9,39.9,39.9,39.9,39.9,...,29.0,31.6,34.6,32.9,33.7,33.2,33.2,33.2,33.8,34.5
3,Andorra,42.5,42.5,42.5,42.5,42.5,42.5,42.5,42.5,42.5,...,35.5,35.4,35.4,35.3,35.2,35.2,35.1,35.0,35.0,35.0
4,United Arab Emirates,39.8,39.8,39.8,39.8,39.8,39.8,39.8,39.7,39.7,...,32.6,32.5,31.1,29.7,28.4,27.2,26.0,26.0,26.0,26.7


In [30]:
income_inequality.duplicated().sum()

0

In [31]:
income_inequality.shape

(197, 223)

In [32]:
income_inequality.isnull().any().any()

False

In [33]:
# Population [# of people]
population = pd.read_csv('./data/population_wb.csv')
population.head()

Unnamed: 0,Country Name,Country Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Aruba,ABW,54208.0,55434.0,56234.0,56699.0,57029.0,57357.0,57702.0,58044.0,...,102050.0,102565.0,103165.0,103776.0,104339.0,104865.0,105361.0,105846.0,106310.0,106766.0
1,Africa Eastern and Southern,AFE,130836765.0,134159786.0,137614644.0,141202036.0,144920186.0,148769974.0,152752671.0,156876454.0,...,532760424.0,547482863.0,562601578.0,578075373.0,593871847.0,609978946.0,626392880.0,643090131.0,660046272.0,677243299.0
2,Afghanistan,AFG,8996967.0,9169406.0,9351442.0,9543200.0,9744772.0,9956318.0,10174840.0,10399936.0,...,30117411.0,31161378.0,32269592.0,33370804.0,34413603.0,35383028.0,36296111.0,37171922.0,38041757.0,38928341.0
3,Africa Western and Central,AFW,96396419.0,98407221.0,100506960.0,102691339.0,104953470.0,107289875.0,109701811.0,112195950.0,...,360285439.0,370243017.0,380437896.0,390882979.0,401586651.0,412551299.0,423769930.0,435229381.0,446911598.0,458803476.0
4,Angola,AGO,5454938.0,5531451.0,5608499.0,5679409.0,5734995.0,5770573.0,5781305.0,5774440.0,...,24220660.0,25107925.0,26015786.0,26941773.0,27884380.0,28842482.0,29816769.0,30809787.0,31825299.0,32866268.0


In [34]:
population.duplicated().sum()

0

In [35]:
population.shape

(266, 63)

In [36]:
population.isnull().any().any()

True

In [37]:
population.isnull().sum().sum()

103

In [38]:
# In which specific column are there any missing values
population.loc[:, population.isnull().any()].columns

Index(['1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968',
       '1969', '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977',
       '1978', '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986',
       '1987', '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
       '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017', '2018', '2019', '2020'],
      dtype='object')

In [39]:
# changing name of column Country Name and Country Code
population.columns = population.columns.str.lower()
population.head()

Unnamed: 0,country name,country code,1960,1961,1962,1963,1964,1965,1966,1967,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Aruba,ABW,54208.0,55434.0,56234.0,56699.0,57029.0,57357.0,57702.0,58044.0,...,102050.0,102565.0,103165.0,103776.0,104339.0,104865.0,105361.0,105846.0,106310.0,106766.0
1,Africa Eastern and Southern,AFE,130836765.0,134159786.0,137614644.0,141202036.0,144920186.0,148769974.0,152752671.0,156876454.0,...,532760424.0,547482863.0,562601578.0,578075373.0,593871847.0,609978946.0,626392880.0,643090131.0,660046272.0,677243299.0
2,Afghanistan,AFG,8996967.0,9169406.0,9351442.0,9543200.0,9744772.0,9956318.0,10174840.0,10399936.0,...,30117411.0,31161378.0,32269592.0,33370804.0,34413603.0,35383028.0,36296111.0,37171922.0,38041757.0,38928341.0
3,Africa Western and Central,AFW,96396419.0,98407221.0,100506960.0,102691339.0,104953470.0,107289875.0,109701811.0,112195950.0,...,360285439.0,370243017.0,380437896.0,390882979.0,401586651.0,412551299.0,423769930.0,435229381.0,446911598.0,458803476.0
4,Angola,AGO,5454938.0,5531451.0,5608499.0,5679409.0,5734995.0,5770573.0,5781305.0,5774440.0,...,24220660.0,25107925.0,26015786.0,26941773.0,27884380.0,28842482.0,29816769.0,30809787.0,31825299.0,32866268.0


In [40]:
population.rename(columns = {'country name': 'country', 'country code': 'country_code'}, inplace = True)
population.head(3)

Unnamed: 0,country,country_code,1960,1961,1962,1963,1964,1965,1966,1967,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Aruba,ABW,54208.0,55434.0,56234.0,56699.0,57029.0,57357.0,57702.0,58044.0,...,102050.0,102565.0,103165.0,103776.0,104339.0,104865.0,105361.0,105846.0,106310.0,106766.0
1,Africa Eastern and Southern,AFE,130836765.0,134159786.0,137614644.0,141202036.0,144920186.0,148769974.0,152752671.0,156876454.0,...,532760424.0,547482863.0,562601578.0,578075373.0,593871847.0,609978946.0,626392880.0,643090131.0,660046272.0,677243299.0
2,Afghanistan,AFG,8996967.0,9169406.0,9351442.0,9543200.0,9744772.0,9956318.0,10174840.0,10399936.0,...,30117411.0,31161378.0,32269592.0,33370804.0,34413603.0,35383028.0,36296111.0,37171922.0,38041757.0,38928341.0


In [41]:
# Women participation in parliament [%]
women_parliament = pd.read_csv('./data/women_in_parliament.csv')
women_parliament.head()

Unnamed: 0,country,1945,1946,1947,1948,1949,1950,1951,1952,1953,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Afghanistan,,,,,,,,,,...,27.4,27.4,27.4,27.4,27.4,27.7,27.8,27.9,27.2,27.2
1,Angola,,,,,,,,,,...,34.9,34.1,33.5,32.8,32.2,31.6,30.9,30.4,30.0,30.0
2,Albania,3.66,6.46,8.81,10.8,12.5,14.1,13.5,12.9,12.4,...,16.1,15.9,15.7,18.8,21.8,24.8,27.9,28.6,29.5,29.5
3,Andorra,,,,,,,,,,...,51.8,50.0,47.6,44.3,39.3,41.1,42.9,44.6,46.4,46.4
4,United Arab Emirates,,,,,,,,,,...,5.0,17.5,19.2,20.8,22.5,29.4,36.3,43.1,50.0,50.0


In [42]:
women_parliament.duplicated().sum()

0

In [43]:
women_parliament.shape

(193, 77)

In [44]:
women_parliament.isnull().any().any()

True

In [45]:
women_parliament.isnull().sum().sum()

4599

In [46]:
# In which specific column are there any missing values
women_parliament.loc[:, women_parliament.isnull().any()].columns

Index(['1945', '1946', '1947', '1948', '1949', '1950', '1951', '1952', '1953',
       '1954', '1955', '1956', '1957', '1958', '1959', '1960', '1961', '1962',
       '1963', '1964', '1965', '1966', '1967', '1968', '1969', '1970', '1971',
       '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979', '1980',
       '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989',
       '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998',
       '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007',
       '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016',
       '2017', '2018', '2019', '2020'],
      dtype='object')

In [47]:
# making a new df with merged with country code
# usable later on for scatter_map or other map visualisation.
countries = pd.merge(continents, population[['country','country_code']],on='country', how='left')

In [48]:
countries.isnull().sum().sum()

27

In [49]:
#complete the missing values 
countries[countries['country_code'].isnull()]

Unnamed: 0,continent,country,country_code
4,Africa,Burkina,
7,Africa,Cape Verde,
11,Africa,Congo,
12,Africa,"Congo, Democratic Republic of",
14,Africa,Egypt,
19,Africa,Gambia,
23,Africa,Ivory Coast,
47,Africa,Swaziland,
58,Asia,Brunei,
59,Asia,Burma (Myanmar),


In [50]:
dict_country_code = {
    'Burkina': 'BFA', 'Cape Verde': 'CPV', 'Congo': 'COG', 'Congo, Democratic Republic of': 'COD',
    'Egypt': 'EGY', 'Gambia': 'GMB', 'Ivory Coast': 'CIV', 'Swaziland': 'SWZ',
    'Brunei': 'BRN', 'Burma (Myanmar)': 'MMR', 'East Timor': 'TLS', 'Iran': 'IRN',
    'Korea, North': 'PRK', 'Korea, South': 'KOR', 'Kyrgyzstan': 'KGZ', 'Laos': 'LAO',
    'Syria': 'SYR', 'Yemen': 'YEM', 'Macedonia': 'MKD', 'Slovakia': 'SVK',
    'Vatican City': 'VAT', 'Bahamas': 'BHS', 'Saint Kitts and Nevis': 'KNA', 'Saint Lucia': 'LCA',
    'Saint Vincent and the Grenadines': 'VCT', 'Micronesia': 'FSM', 'Venezuela': 'VEN'
}

countries['country_code'] = countries['country_code'].fillna(countries['country'].map(dict_country_code),)

In [51]:
countries.isnull().sum()

continent       0
country         0
country_code    0
dtype: int64

In [52]:
countries.shape

(194, 3)

In [53]:
countries.to_csv('./data/countries.csv', index=False)

In [54]:
democracy.head(50)

Unnamed: 0,country,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Afghanistan,30.6,30.4,30.2,27.5,24.8,24.8,24.8,24.8,27.7,27.7,25.5,25.5,29.7,28.5,28.5
1,Angola,24.1,28.8,33.5,33.4,33.2,33.2,33.5,33.5,33.5,33.5,34.0,36.2,36.2,37.2,36.6
2,Albania,59.1,59.1,59.1,58.9,58.6,58.1,56.7,56.7,56.7,59.1,59.1,59.8,59.8,58.9,60.8
3,United Arab Emirates,24.2,25.1,26.0,25.6,25.2,25.8,25.8,25.2,26.4,27.5,27.5,26.9,27.6,27.6,27.0
4,Argentina,66.3,66.3,66.3,67.3,68.4,68.4,68.4,68.4,68.4,70.2,69.6,69.6,70.2,70.2,69.5
5,Armenia,41.5,41.2,40.9,40.9,40.9,40.9,40.9,40.2,41.3,40.0,38.8,41.1,47.9,55.4,53.5
6,Australia,90.9,90.9,90.9,91.5,92.2,92.2,92.2,91.3,90.1,90.1,90.1,90.9,90.9,90.9,89.6
7,Austria,86.9,85.9,84.9,84.9,84.9,84.9,86.2,84.8,85.4,85.4,84.1,84.2,82.9,82.9,81.6
8,Azerbaijan,33.1,32.5,31.9,31.7,31.5,31.5,31.5,30.6,28.3,27.1,26.5,26.5,26.5,27.5,26.8
9,Burundi,45.1,45.1,45.1,42.6,40.1,40.1,36.0,34.1,33.3,24.9,24.0,23.3,23.3,21.5,21.4


In [55]:
democracy_miss = pd.merge(democracy,countries[['country','country_code', 'continent']],on='country', how='outer')
democracy_miss.head(30)

Unnamed: 0,country,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,country_code,continent
0,Afghanistan,30.6,30.4,30.2,27.5,24.8,24.8,24.8,24.8,27.7,27.7,25.5,25.5,29.7,28.5,28.5,AFG,Asia
1,Albania,59.1,59.1,59.1,58.9,58.6,58.1,56.7,56.7,56.7,59.1,59.1,59.8,59.8,58.9,60.8,ALB,Europe
2,Algeria,31.7,32.5,33.2,33.8,34.4,34.4,38.3,38.3,38.3,39.5,35.6,35.6,35.0,40.1,,DZA,Africa
3,Andorra,,,,,,,,,,,,,,,,AND,Europe
4,Angola,24.1,28.8,33.5,33.4,33.2,33.2,33.5,33.5,33.5,33.5,34.0,36.2,36.2,37.2,36.6,AGO,Africa
5,Antigua and Barbuda,,,,,,,,,,,,,,,,ATG,North America
6,Argentina,66.3,66.3,66.3,67.3,68.4,68.4,68.4,68.4,68.4,70.2,69.6,69.6,70.2,70.2,69.5,ARG,South America
7,Armenia,41.5,41.2,40.9,40.9,40.9,40.9,40.9,40.2,41.3,40.0,38.8,41.1,47.9,55.4,53.5,ARM,Europe
8,Australia,90.9,90.9,90.9,91.5,92.2,92.2,92.2,91.3,90.1,90.1,90.1,90.9,90.9,90.9,89.6,AUS,Australia and Oceania
9,Austria,86.9,85.9,84.9,84.9,84.9,84.9,86.2,84.8,85.4,85.4,84.1,84.2,82.9,82.9,81.6,AUT,Europe


In [56]:
democracy_miss[democracy_miss['continent'].isnull()]

Unnamed: 0,country,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,country_code,continent
27,Burkina Faso,37.2,36.6,36.0,36.0,35.9,35.9,35.2,41.5,40.9,47.0,47.0,47.5,47.5,40.4,37.3,,
41,"Congo, Dem. Rep.",27.6,25.2,22.8,22.1,21.5,21.5,19.2,18.3,17.5,21.1,19.3,16.1,14.9,11.3,11.3,,
43,"Congo, Rep.",31.9,30.6,29.4,29.1,28.9,28.9,28.9,28.9,28.9,29.1,29.1,32.5,33.1,31.1,31.1,,
45,Cote d'Ivoire,33.8,33.3,32.7,31.4,30.2,30.8,32.5,32.5,35.3,33.1,38.1,39.3,41.5,40.5,41.1,,
61,Eswatini,29.3,29.9,30.4,29.7,29.0,32.6,32.0,32.0,30.9,30.9,30.3,30.3,30.3,31.4,30.8,,
79,"Hong Kong, China",60.3,59.4,58.5,58.9,59.2,59.2,64.2,64.2,64.6,65.0,64.2,63.1,61.5,60.2,55.7,,
99,Kyrgyz Republic,40.8,40.6,40.5,41.8,43.1,43.4,46.9,46.9,52.4,53.3,49.3,51.1,51.1,48.9,42.1,,
101,Lao,21.0,21.0,21.0,21.0,21.0,21.0,23.2,22.1,22.1,22.1,23.7,23.7,23.7,21.4,17.7,,
129,Myanmar,17.7,17.7,17.7,17.7,17.7,17.7,23.5,27.6,30.5,41.4,42.0,38.3,38.3,35.5,30.4,,
138,North Korea,10.3,9.45,8.6,9.7,10.8,10.8,10.8,10.8,10.8,10.8,10.8,10.8,10.8,10.8,10.8,,


In [72]:
democracy_miss['country_code'] = democracy_miss['country_code'].fillna(democracy_miss['country'].map(dict_country_code_2))
democracy_miss['continent'] = democracy_miss['continent'].fillna(democracy_miss['country'].map(dict_continent))

In [73]:
democracy_miss['country'] = democracy_miss['country'].replace(rename_dict)
democracy_miss['nan_count'] = democracy_miss.isnull().sum(axis=1)
democracy_miss = democracy_miss.sort_values(by=['country_code', 'nan_count'])
democracy_miss

Unnamed: 0,country,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,country_code,continent,nan_count
0,Afghanistan,30.6,30.4,30.2,27.5,24.8,24.8,24.8,24.8,27.7,27.7,25.5,25.5,29.7,28.5,28.5,AFG,Asia,0
4,Angola,24.1,28.8,33.5,33.4,33.2,33.2,33.5,33.5,33.5,33.5,34.0,36.2,36.2,37.2,36.6,AGO,Africa,0
1,Albania,59.1,59.1,59.1,58.9,58.6,58.1,56.7,56.7,56.7,59.1,59.1,59.8,59.8,58.9,60.8,ALB,Europe,0
3,Andorra,,,,,,,,,,,,,,,,AND,Europe,15
199,United Arab Emirates,24.2,25.1,26.0,25.6,25.2,25.8,25.8,25.2,26.4,27.5,27.5,26.9,27.6,27.6,27.0,ARE,Asia,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,Samoa,,,,,,,,,,,,,,,,WSM,Australia and Oceania,15
208,Yemen,29.8,29.6,29.5,27.9,26.4,25.7,31.2,27.9,27.9,22.4,20.7,20.7,19.5,19.5,19.5,YEM,Asia,0
174,South Africa,79.1,79.1,79.1,78.5,77.9,77.9,77.9,79.0,78.2,75.6,74.1,72.4,72.4,72.4,70.5,ZAF,Africa,0
209,Zambia,52.5,52.5,52.5,54.6,56.8,61.9,62.6,62.6,63.9,62.8,59.9,56.8,56.1,50.9,48.6,ZMB,Africa,0


In [74]:
democracy_miss = democracy_miss.drop_duplicates(subset='country_code', keep='first')
democracy_miss.head(20)

Unnamed: 0,country,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,country_code,continent,nan_count
0,Afghanistan,30.6,30.4,30.2,27.5,24.8,24.8,24.8,24.8,27.7,27.7,25.5,25.5,29.7,28.5,28.5,AFG,Asia,0
4,Angola,24.1,28.8,33.5,33.4,33.2,33.2,33.5,33.5,33.5,33.5,34.0,36.2,36.2,37.2,36.6,AGO,Africa,0
1,Albania,59.1,59.1,59.1,58.9,58.6,58.1,56.7,56.7,56.7,59.1,59.1,59.8,59.8,58.9,60.8,ALB,Europe,0
3,Andorra,,,,,,,,,,,,,,,,AND,Europe,15
199,United Arab Emirates,24.2,25.1,26.0,25.6,25.2,25.8,25.8,25.2,26.4,27.5,27.5,26.9,27.6,27.6,27.0,ARE,Asia,0
6,Argentina,66.3,66.3,66.3,67.3,68.4,68.4,68.4,68.4,68.4,70.2,69.6,69.6,70.2,70.2,69.5,ARG,South America,0
7,Armenia,41.5,41.2,40.9,40.9,40.9,40.9,40.9,40.2,41.3,40.0,38.8,41.1,47.9,55.4,53.5,ARM,Europe,0
5,Antigua and Barbuda,,,,,,,,,,,,,,,,ATG,North America,15
8,Australia,90.9,90.9,90.9,91.5,92.2,92.2,92.2,91.3,90.1,90.1,90.1,90.9,90.9,90.9,89.6,AUS,Australia and Oceania,0
9,Austria,86.9,85.9,84.9,84.9,84.9,84.9,86.2,84.8,85.4,85.4,84.1,84.2,82.9,82.9,81.6,AUT,Europe,0


In [75]:
democracy_miss.drop('nan_count', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  democracy_miss.drop('nan_count', axis=1, inplace=True)


In [76]:
democracy_miss

Unnamed: 0,country,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,country_code,continent
0,Afghanistan,30.6,30.4,30.2,27.5,24.8,24.8,24.8,24.8,27.7,27.7,25.5,25.5,29.7,28.5,28.5,AFG,Asia
4,Angola,24.1,28.8,33.5,33.4,33.2,33.2,33.5,33.5,33.5,33.5,34.0,36.2,36.2,37.2,36.6,AGO,Africa
1,Albania,59.1,59.1,59.1,58.9,58.6,58.1,56.7,56.7,56.7,59.1,59.1,59.8,59.8,58.9,60.8,ALB,Europe
3,Andorra,,,,,,,,,,,,,,,,AND,Europe
199,United Arab Emirates,24.2,25.1,26.0,25.6,25.2,25.8,25.8,25.2,26.4,27.5,27.5,26.9,27.6,27.6,27.0,ARE,Asia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,Samoa,,,,,,,,,,,,,,,,WSM,Australia and Oceania
208,Yemen,29.8,29.6,29.5,27.9,26.4,25.7,31.2,27.9,27.9,22.4,20.7,20.7,19.5,19.5,19.5,YEM,Asia
174,South Africa,79.1,79.1,79.1,78.5,77.9,77.9,77.9,79.0,78.2,75.6,74.1,72.4,72.4,72.4,70.5,ZAF,Africa
209,Zambia,52.5,52.5,52.5,54.6,56.8,61.9,62.6,62.6,63.9,62.8,59.9,56.8,56.1,50.9,48.6,ZMB,Africa


In [189]:

democracy_miss.to_csv('./data/democracy_full.csv', index=False)

In [78]:
government_miss = pd.merge(functioning_government,countries[['country','country_code','continent']],on='country', how='outer')
government_miss.shape

(211, 18)

In [79]:
government_miss['country_code'] = government_miss['country_code'].fillna(government_miss['country'].map(dict_country_code_2))
government_miss['continent'] = government_miss['continent'].fillna(government_miss['country'].map(dict_continent))
government_miss['country'] = government_miss['country'].replace(rename_dict)
government_miss['nan_count'] = government_miss.isnull().sum(axis=1)
government_miss = government_miss.sort_values(by=['country_code', 'nan_count'])
government_miss

Unnamed: 0,country,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,country_code,continent,nan_count
0,Afghanistan,0.0,3.95,7.9,7.9,7.9,7.9,7.9,7.9,11.4,11.4,11.4,11.4,11.4,6.4,6.4,AFG,Asia,0
4,Angola,21.4,26.80,32.1,32.1,32.1,32.1,32.1,32.1,32.1,32.1,32.1,28.6,28.6,28.6,28.6,AGO,Africa,0
1,Albania,50.7,50.70,50.7,50.7,50.7,47.1,40.0,40.0,40.0,43.6,43.6,47.1,47.1,53.6,53.6,ALB,Europe,0
3,Andorra,,,,,,,,,,,,,,,,AND,Europe,15
199,United Arab Emirates,30.7,35.00,39.3,37.5,35.7,35.7,35.7,35.7,35.7,35.7,35.7,35.7,39.3,39.3,39.3,ARE,Asia,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,Samoa,,,,,,,,,,,,,,,,WSM,Australia and Oceania,15
208,Yemen,27.1,26.10,25.0,21.4,17.9,3.6,14.3,14.3,14.3,3.6,0.0,0.0,0.0,0.0,0.0,YEM,Asia,0
174,South Africa,78.6,78.60,78.6,80.3,82.1,82.1,82.1,82.1,82.1,82.1,78.6,75.0,75.0,75.0,71.4,ZAF,Africa,0
209,Zambia,46.4,46.40,46.4,50.0,53.6,50.0,53.6,53.6,53.6,53.6,53.6,50.0,46.4,29.3,29.3,ZMB,Africa,0


In [191]:
government_miss = government_miss.drop_duplicates(subset='country_code', keep='first')
government_miss

Unnamed: 0,country,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,country_code,continent
0,Afghanistan,0.0,3.95,7.9,7.9,7.9,7.9,7.9,7.9,11.4,11.4,11.4,11.4,11.4,6.4,6.4,AFG,Asia
4,Angola,21.4,26.80,32.1,32.1,32.1,32.1,32.1,32.1,32.1,32.1,32.1,28.6,28.6,28.6,28.6,AGO,Africa
1,Albania,50.7,50.70,50.7,50.7,50.7,47.1,40.0,40.0,40.0,43.6,43.6,47.1,47.1,53.6,53.6,ALB,Europe
3,Andorra,,,,,,,,,,,,,,,,AND,Europe
199,United Arab Emirates,30.7,35.00,39.3,37.5,35.7,35.7,35.7,35.7,35.7,35.7,35.7,35.7,39.3,39.3,39.3,ARE,Asia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,Samoa,,,,,,,,,,,,,,,,WSM,Australia and Oceania
208,Yemen,27.1,26.10,25.0,21.4,17.9,3.6,14.3,14.3,14.3,3.6,0.0,0.0,0.0,0.0,0.0,YEM,Asia
174,South Africa,78.6,78.60,78.6,80.3,82.1,82.1,82.1,82.1,82.1,82.1,78.6,75.0,75.0,75.0,71.4,ZAF,Africa
209,Zambia,46.4,46.40,46.4,50.0,53.6,50.0,53.6,53.6,53.6,53.6,53.6,50.0,46.4,29.3,29.3,ZMB,Africa


In [192]:
government_miss.drop('nan_count', axis=1, inplace=True)

KeyError: "['nan_count'] not found in axis"

In [193]:
government_miss.to_csv('./data/government_full.csv', index=False)

In [82]:
happiness_score.head(50)

Unnamed: 0,country,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Afghanistan,,,,37.2,44.0,47.6,38.3,37.8,35.7,31.3,39.8,42.2,26.6,26.9,25.7
1,Angola,,,,,,,55.9,43.6,39.4,37.9,,,,,
2,Albania,,,46.3,,54.9,52.7,58.7,55.1,45.5,48.1,46.1,45.1,46.4,50.0,48.8
3,United Arab Emirates,,67.3,,,68.7,71.0,71.2,72.2,66.2,65.4,65.7,68.3,70.4,66.0,67.9
4,Argentina,,63.1,60.7,59.6,64.2,64.4,67.8,64.7,65.8,66.7,67.0,64.3,60.4,57.9,59.7
5,Armenia,,42.9,48.8,46.5,41.8,43.7,42.6,43.2,42.8,44.5,43.5,43.3,42.9,50.6,46.8
6,Australia,73.4,,72.9,72.5,,74.5,74.1,72.0,73.6,72.9,73.1,72.5,72.6,71.8,72.2
7,Austria,,71.2,,71.8,,73.0,74.7,74.0,75.0,69.5,70.8,70.5,72.9,74.0,72.9
8,Azerbaijan,,47.3,45.7,48.2,45.7,42.2,46.8,49.1,54.8,52.5,51.5,53.0,51.5,51.7,51.6
9,Burundi,,,,35.6,37.9,,37.1,,,29.0,,,,37.8,37.8


In [176]:
happiness_miss = pd.merge(happiness_score,countries[['country','country_code', 'continent']],on='country', how='outer')
happiness_miss.shape

(209, 18)

In [177]:
countries['continent'].unique()

array(['Africa', 'Asia', 'Europe', 'North America',
       'Australia and Oceania', 'South America'], dtype=object)

In [178]:
# different name thatn the tbale countries, so adding manually those country_code and continent
happiness_miss[happiness_miss['continent'].isnull()]

Unnamed: 0,country,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,country_code,continent
27,Burkina Faso,,38.0,40.2,38.5,,40.4,47.9,39.6,33.3,34.8,44.2,42.1,46.5,49.3,47.7,,
41,"Congo, Dem. Rep.",,,,,39.8,,45.2,46.4,45.0,44.1,39.0,45.2,43.1,,43.1,,
43,"Congo, Rep.",,,,38.2,,,45.1,39.2,39.5,40.6,46.9,41.2,48.8,54.9,51.9,,
45,Cote d'Ivoire,,,,,42.0,,,,37.4,35.7,44.5,45.4,50.4,52.7,52.3,,
61,Eswatini,,,,,,,48.7,,,,,,,42.1,43.1,,
79,"Hong Kong, China",,55.1,,51.4,54.0,56.4,54.7,54.8,,54.6,,55.0,53.6,,55.1,,
99,Kyrgyz Republic,,46.4,47.0,47.4,50.7,50.0,49.2,52.1,54.0,52.5,49.1,48.6,56.3,53.0,55.4,,
101,Lao,,50.8,53.6,50.4,,,47.0,48.8,,,,,46.2,48.6,48.9,,
129,Myanmar,,,,,,,,44.4,41.8,47.9,42.2,46.2,41.5,44.1,43.1,,
138,North Macedonia,,,44.9,,44.3,41.8,49.0,46.4,51.9,52.0,49.8,53.5,52.3,52.4,51.6,,


In [179]:
dict_country_code_2 = {
    "Burkina Faso": "BFA",
    "Congo, Dem. Rep.": "COD",
    "Congo, Rep.": "COG",
    "Cote d'Ivoire": "CIV",
    "Eswatini": "SWZ",
    "Holy See": "VAT",
    "Hong Kong, China": "HKG",
    "Kyrgyz Republic": "KGZ",
    "Lao": "LAO",
    "Micronesia, Fed. Sts.": "FSM",
    "Myanmar": "MMR",
    "North Korea": "PRK",
    "North Macedonia": "MKD",
    "Palestine": "PSE",
    "Russia": "RUS",
    "Slovak Republic": "SVK",
    "South Korea": "KOR",
    "St. Kitts and Nevis": "KNA",
    "St. Lucia": "LCA",
    "St. Vincent and the Grenadines": "VCT",
    "Taiwan": "TWN",
    "Timor-Leste": "TLS"
}

happiness_miss['country_code'] = happiness_miss['country_code'].fillna(happiness_miss['country'].map(dict_country_code_2))

In [180]:
dict_continent = {
    "Burkina Faso": "Africa",
    "Congo, Dem. Rep.": "Africa",
    "Congo, Rep.": "Africa",
    "Cote d'Ivoire": "Africa",
    "Eswatini": "Africa",
    "Hong Kong, China": "Asia",
    "Kyrgyz Republic": "Asia",
    "Lao": "Asia",
    "Micronesia, Fed. Sts.": "Australia and Oceania",
    "Myanmar": "Asia",
    "North Korea": "Asia",
    "North Macedonia": "Europe",
    "Palestine": "Asia",
    "Russia": "Europe, Asia", 
    "Slovak Republic": "Europe",
    "South Korea": "Asia",
    "St. Kitts and Nevis": "North America",
    "St. Lucia": "North America",
    "St. Vincent and the Grenadines": "North America",
    "Taiwan": "Asia",
    "Timor-Leste": "Asia"
}

happiness_miss['continent'] = happiness_miss['continent'].fillna(happiness_miss['country'].map(dict_continent))

In [181]:
duplicates_happ = happiness_miss[happiness_miss['country_code'].duplicated(keep=False)]
duplicates_happ.sort_values(by='country_code')

Unnamed: 0,country,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,country_code,continent
26,Burkina,,,,,,,,,,,,,,,,BFA,Africa
27,Burkina Faso,,38.0,40.2,38.5,,40.4,47.9,39.6,33.3,34.8,44.2,42.1,46.5,49.3,47.7,BFA,Africa
45,Cote d'Ivoire,,,,,42.0,,,,37.4,35.7,44.5,45.4,50.4,52.7,52.3,CIV,Africa
89,Ivory Coast,,,,,,,,,,,,,,,,CIV,Africa
41,"Congo, Dem. Rep.",,,,,39.8,,45.2,46.4,45.0,44.1,39.0,45.2,43.1,,43.1,COD,Africa
42,"Congo, Democratic Republic of",,,,,,,,,,,,,,,,COD,Africa
40,Congo,,,,,,,,,,,,,,,,COG,Africa
43,"Congo, Rep.",,,,38.2,,,45.1,39.2,39.5,40.6,46.9,41.2,48.8,54.9,51.9,COG,Africa
100,Kyrgyzstan,,,,,,,,,,,,,,,,KGZ,Asia
99,Kyrgyz Republic,,46.4,47.0,47.4,50.7,50.0,49.2,52.1,54.0,52.5,49.1,48.6,56.3,53.0,55.4,KGZ,Asia


In [182]:
rename_dict = {
    'Burkina Faso': 'Burkina',
    "Cote d'Ivoire": 'Ivory Coast',
    "Congo, Dem. Rep.": "Congo, Democratic Republic of",
    "Congo, Rep.": "Congo",
    "Kyrgyz Republic": "Kyrgyzstan",
    "South Korea": "Korea, South",
    "Lao": "Laos",
    "North Macedonia": "Macedonia",
    "Myanmar": "Burma (Myanmar)",
    "Russian Federation": "Russia",
    "Slovak Republic": "Slovakia",
    "Swaziland": "Eswatini"
}

happiness_miss['country'] = happiness_miss['country'].replace(rename_dict)
happiness_miss.head(50)

Unnamed: 0,country,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,country_code,continent
0,Afghanistan,,,,37.2,44.0,47.6,38.3,37.8,35.7,31.3,39.8,42.2,26.6,26.9,25.7,AFG,Asia
1,Albania,,,46.3,,54.9,52.7,58.7,55.1,45.5,48.1,46.1,45.1,46.4,50.0,48.8,ALB,Europe
2,Algeria,,,,,,54.6,53.2,56.0,,63.5,,53.4,52.5,50.4,50.1,DZA,Africa
3,Andorra,,,,,,,,,,,,,,,,AND,Europe
4,Angola,,,,,,,55.9,43.6,39.4,37.9,,,,,,AGO,Africa
5,Antigua and Barbuda,,,,,,,,,,,,,,,,ATG,North America
6,Argentina,,63.1,60.7,59.6,64.2,64.4,67.8,64.7,65.8,66.7,67.0,64.3,60.4,57.9,59.7,ARG,South America
7,Armenia,,42.9,48.8,46.5,41.8,43.7,42.6,43.2,42.8,44.5,43.5,43.3,42.9,50.6,46.8,ARM,Europe
8,Australia,73.4,,72.9,72.5,,74.5,74.1,72.0,73.6,72.9,73.1,72.5,72.6,71.8,72.2,AUS,Australia and Oceania
9,Austria,,71.2,,71.8,,73.0,74.7,74.0,75.0,69.5,70.8,70.5,72.9,74.0,72.9,AUT,Europe


In [183]:
# to drop the duplicate row with all null values 
happiness_miss['nan_count'] = happiness_miss.isnull().sum(axis=1)
happiness_miss = happiness_miss.sort_values(by=['country_code', 'nan_count'])
happiness_miss

Unnamed: 0,country,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,country_code,continent,nan_count
0,Afghanistan,,,,37.2,44.0,47.6,38.3,37.8,35.7,31.3,39.8,42.2,26.6,26.9,25.7,AFG,Asia,3
4,Angola,,,,,,,55.9,43.6,39.4,37.9,,,,,,AGO,Africa,11
1,Albania,,,46.3,,54.9,52.7,58.7,55.1,45.5,48.1,46.1,45.1,46.4,50.0,48.8,ALB,Europe,3
3,Andorra,,,,,,,,,,,,,,,,AND,Europe,15
197,United Arab Emirates,,67.3,,,68.7,71.0,71.2,72.2,66.2,65.4,65.7,68.3,70.4,66.0,67.9,ARE,Asia,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159,Samoa,,,,,,,,,,,,,,,,WSM,Australia and Oceania,15
206,Yemen,,,44.8,,48.1,43.5,37.5,40.6,42.2,39.7,29.8,38.3,32.5,30.6,35.3,YEM,Asia,3
173,South Africa,,50.8,52.0,53.5,52.2,46.5,49.3,51.3,36.6,48.3,48.9,47.7,45.1,48.8,48.1,ZAF,Africa,1
207,Zambia,,48.2,40.0,47.3,52.6,,50.0,50.1,52.4,43.5,48.4,43.5,39.3,40.4,37.6,ZMB,Africa,2


In [184]:
happiness_miss = happiness_miss.drop_duplicates(subset='country_code', keep='first')
happiness_miss.head(20)

Unnamed: 0,country,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,country_code,continent,nan_count
0,Afghanistan,,,,37.2,44.0,47.6,38.3,37.8,35.7,31.3,39.8,42.2,26.6,26.9,25.7,AFG,Asia,3
4,Angola,,,,,,,55.9,43.6,39.4,37.9,,,,,,AGO,Africa,11
1,Albania,,,46.3,,54.9,52.7,58.7,55.1,45.5,48.1,46.1,45.1,46.4,50.0,48.8,ALB,Europe,3
3,Andorra,,,,,,,,,,,,,,,,AND,Europe,15
197,United Arab Emirates,,67.3,,,68.7,71.0,71.2,72.2,66.2,65.4,65.7,68.3,70.4,66.0,67.9,ARE,Asia,3
6,Argentina,,63.1,60.7,59.6,64.2,64.4,67.8,64.7,65.8,66.7,67.0,64.3,60.4,57.9,59.7,ARG,South America,1
7,Armenia,,42.9,48.8,46.5,41.8,43.7,42.6,43.2,42.8,44.5,43.5,43.3,42.9,50.6,46.8,ARM,Europe,1
5,Antigua and Barbuda,,,,,,,,,,,,,,,,ATG,North America,15
8,Australia,73.4,,72.9,72.5,,74.5,74.1,72.0,73.6,72.9,73.1,72.5,72.6,71.8,72.2,AUS,Australia and Oceania,2
9,Austria,,71.2,,71.8,,73.0,74.7,74.0,75.0,69.5,70.8,70.5,72.9,74.0,72.9,AUT,Europe,3


In [185]:
happiness_miss.drop('nan_count', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  happiness_miss.drop('nan_count', axis=1, inplace=True)


In [186]:
happiness_miss.to_csv('./data/happiness_score_full.csv', index=False)

In [187]:
#Where those countries have already missing values in 2019 before the merge
missing_values_happiness

Unnamed: 0,country,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
1,Angola,,,,,,,55.9,43.6,39.4,37.9,,,,,
18,Belize,,,64.5,,,,,,,59.6,,,,,
21,Bhutan,,,,,,,,,55.7,49.4,50.8,,,,
35,Cuba,,54.2,,,,,,,,,,,,,
39,Djibouti,,,,50.1,49.1,50.1,43.7,,,,,,,,
58,Guyana,,,59.9,,,,,,,,,,,,
114,Oman,,,,,,,68.5,,,,,,,,
123,Qatar,,,,,64.2,68.5,65.9,66.1,,,63.7,,,,
128,Sudan,,,,,44.5,44.4,43.1,45.5,,41.4,,,,,
133,Somalia,,,,,49.9,46.6,49.3,50.6,,55.3,53.5,46.7,,,


In [188]:
#All the countries that are missing in the df 'happiness_score'
missing_countries_happiness = happiness_miss[happiness_miss['2019'].isnull()]
missing_countries_happiness['country'].count()

#58-12= 46 countries are absent from the happiness score df

46

In [91]:
#again, renaming countries?
extra_population = pd.merge(countries, population, on='country_code', how='outer')
extra_population

Unnamed: 0,continent,country_x,country_code,country_y,1960,1961,1962,1963,1964,1965,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,,,ABW,Aruba,54208.0,55434.0,56234.0,56699.0,57029.0,57357.0,...,102050.0,102565.0,103165.0,103776.0,104339.0,104865.0,105361.0,105846.0,106310.0,106766.0
1,,,AFE,Africa Eastern and Southern,130836765.0,134159786.0,137614644.0,141202036.0,144920186.0,148769974.0,...,532760424.0,547482863.0,562601578.0,578075373.0,593871847.0,609978946.0,626392880.0,643090131.0,660046272.0,677243299.0
2,Asia,Afghanistan,AFG,Afghanistan,8996967.0,9169406.0,9351442.0,9543200.0,9744772.0,9956318.0,...,30117411.0,31161378.0,32269592.0,33370804.0,34413603.0,35383028.0,36296111.0,37171922.0,38041757.0,38928341.0
3,,,AFW,Africa Western and Central,96396419.0,98407221.0,100506960.0,102691339.0,104953470.0,107289875.0,...,360285439.0,370243017.0,380437896.0,390882979.0,401586651.0,412551299.0,423769930.0,435229381.0,446911598.0,458803476.0
4,Africa,Angola,AGO,Angola,5454938.0,5531451.0,5608499.0,5679409.0,5734995.0,5770573.0,...,24220660.0,25107925.0,26015786.0,26941773.0,27884380.0,28842482.0,29816769.0,30809787.0,31825299.0,32866268.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
262,,,XKX,Kosovo,947000.0,966000.0,994000.0,1022000.0,1050000.0,1078000.0,...,1791000.0,1807106.0,1818117.0,1812771.0,1788196.0,1777557.0,1791003.0,1797085.0,1788878.0,1790133.0
263,Asia,Yemen,YEM,"Yemen, Rep.",5315351.0,5393034.0,5473671.0,5556767.0,5641598.0,5727745.0,...,23807586.0,24473176.0,25147112.0,25823488.0,26497881.0,27168210.0,27834811.0,28498683.0,29161922.0,29825968.0
264,Africa,South Africa,ZAF,South Africa,17099836.0,17524533.0,17965733.0,18423157.0,18896303.0,19384838.0,...,52003759.0,52832659.0,53687125.0,54544184.0,55386369.0,56207649.0,57009751.0,57792520.0,58558267.0,59308690.0
265,Africa,Zambia,ZMB,Zambia,3070780.0,3164330.0,3260645.0,3360099.0,3463211.0,3570466.0,...,14023199.0,14465148.0,14926551.0,15399793.0,15879370.0,16363449.0,16853608.0,17351714.0,17861034.0,18383956.0


In [92]:
population.isnull().sum().sum()

103

In [93]:
extra_population[extra_population['country_y'].isnull()]

Unnamed: 0,continent,country_x,country_code,country_y,1960,1961,1962,1963,1964,1965,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
253,Europe,Vatican City,VAT,,,,,,,,...,,,,,,,,,,


In [94]:
dict_vat = {'Vatican City': 'Vatican City'}
extra_population['country_y'] = extra_population['country_y'].fillna(extra_population['country_x'].map(dict_vat))

In [95]:
extra_population.drop(['country_x'], inplace=True, axis=1)

In [96]:
population = extra_population.rename(columns = {'country_y': 'country'}, inplace = True)
population = extra_population

In [97]:
population[population['continent'].isnull()]

Unnamed: 0,continent,country_code,country,1960,1961,1962,1963,1964,1965,1966,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,,ABW,Aruba,5.420800e+04,5.543400e+04,5.623400e+04,5.669900e+04,5.702900e+04,5.735700e+04,5.770200e+04,...,1.020500e+05,1.025650e+05,1.031650e+05,1.037760e+05,1.043390e+05,1.048650e+05,1.053610e+05,1.058460e+05,1.063100e+05,1.067660e+05
1,,AFE,Africa Eastern and Southern,1.308368e+08,1.341598e+08,1.376146e+08,1.412020e+08,1.449202e+08,1.487700e+08,1.527527e+08,...,5.327604e+08,5.474829e+08,5.626016e+08,5.780754e+08,5.938718e+08,6.099789e+08,6.263929e+08,6.430901e+08,6.600463e+08,6.772433e+08
3,,AFW,Africa Western and Central,9.639642e+07,9.840722e+07,1.005070e+08,1.026913e+08,1.049535e+08,1.072899e+08,1.097018e+08,...,3.602854e+08,3.702430e+08,3.804379e+08,3.908830e+08,4.015867e+08,4.125513e+08,4.237699e+08,4.352294e+08,4.469116e+08,4.588035e+08
7,,ARB,Arab World,9.219772e+07,9.472454e+07,9.733444e+07,1.000342e+08,1.028328e+08,1.057364e+08,1.087586e+08,...,3.631568e+08,3.714376e+08,3.796965e+08,3.878998e+08,3.960283e+08,4.040429e+08,4.119428e+08,4.198520e+08,4.278703e+08,4.360807e+08
11,,ASM,American Samoa,2.012700e+04,2.060500e+04,2.124600e+04,2.202900e+04,2.285000e+04,2.367500e+04,2.447300e+04,...,5.575500e+04,5.566900e+04,5.571700e+04,5.579100e+04,5.580600e+04,5.573900e+04,5.561700e+04,5.546100e+04,5.531200e+04,5.519700e+04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249,,UMC,Upper middle income,1.136464e+09,1.140114e+09,1.156187e+09,1.183604e+09,1.210649e+09,1.238613e+09,1.269459e+09,...,2.364103e+09,2.383978e+09,2.404063e+09,2.423763e+09,2.442626e+09,2.461213e+09,2.480025e+09,2.496446e+09,2.510773e+09,2.522438e+09
256,,VGB,British Virgin Islands,8.053000e+03,8.164000e+03,8.319000e+03,8.469000e+03,8.644000e+03,8.836000e+03,9.022000e+03,...,2.832600e+04,2.865400e+04,2.885000e+04,2.898500e+04,2.914800e+04,2.935500e+04,2.956700e+04,2.979500e+04,3.003300e+04,3.023700e+04
257,,VIR,Virgin Islands (U.S.),3.250000e+04,3.430000e+04,3.500000e+04,3.980000e+04,4.080000e+04,4.350000e+04,4.620000e+04,...,1.082900e+05,1.081880e+05,1.080410e+05,1.078820e+05,1.077120e+05,1.075160e+05,1.072810e+05,1.070010e+05,1.066690e+05,1.062900e+05
260,,WLD,World,3.032156e+09,3.071596e+09,3.124561e+09,3.189656e+09,3.255146e+09,3.322047e+09,3.392098e+09,...,7.003760e+09,7.089255e+09,7.175500e+09,7.261847e+09,7.347679e+09,7.433569e+09,7.519183e+09,7.602433e+09,7.683438e+09,7.763499e+09


In [98]:
#rename countries 
rename_dict = {
    'Burkina Faso': 'Burkina',
    "Cote d'Ivoire": 'Ivory Coast',
    "Congo, Dem. Rep.": "Congo, Democratic Republic of",
    "Congo, Rep.": "Congo",
    "Kyrgyz Republic": "Kyrgyzstan",
    "South Korea": "Korea, South",
    "Lao": "Laos",
    "North Macedonia": "Macedonia",
    "Myanmar": "Burma (Myanmar)",
    "Russian Federation": "Russia",
    "Slovak Republic": "Slovakia",
    "Swaziland": "Eswatini"
}

population['country'] = population['country'].replace(rename_dict)
population.head(50)

Unnamed: 0,continent,country_code,country,1960,1961,1962,1963,1964,1965,1966,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,,ABW,Aruba,54208.0,55434.0,56234.0,56699.0,57029.0,57357.0,57702.0,...,102050.0,102565.0,103165.0,103776.0,104339.0,104865.0,105361.0,105846.0,106310.0,106766.0
1,,AFE,Africa Eastern and Southern,130836765.0,134159786.0,137614644.0,141202036.0,144920186.0,148769974.0,152752671.0,...,532760400.0,547482900.0,562601600.0,578075400.0,593871800.0,609978900.0,626392900.0,643090100.0,660046300.0,677243300.0
2,Asia,AFG,Afghanistan,8996967.0,9169406.0,9351442.0,9543200.0,9744772.0,9956318.0,10174840.0,...,30117410.0,31161380.0,32269590.0,33370800.0,34413600.0,35383030.0,36296110.0,37171920.0,38041760.0,38928340.0
3,,AFW,Africa Western and Central,96396419.0,98407221.0,100506960.0,102691339.0,104953470.0,107289875.0,109701811.0,...,360285400.0,370243000.0,380437900.0,390883000.0,401586700.0,412551300.0,423769900.0,435229400.0,446911600.0,458803500.0
4,Africa,AGO,Angola,5454938.0,5531451.0,5608499.0,5679409.0,5734995.0,5770573.0,5781305.0,...,24220660.0,25107920.0,26015790.0,26941770.0,27884380.0,28842480.0,29816770.0,30809790.0,31825300.0,32866270.0
5,Europe,ALB,Albania,1608800.0,1659800.0,1711319.0,1762621.0,1814135.0,1864791.0,1914573.0,...,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0,2837849.0
6,Europe,AND,Andorra,13410.0,14378.0,15379.0,16407.0,17466.0,18542.0,19646.0,...,83748.0,82427.0,80770.0,79213.0,77993.0,77295.0,76997.0,77008.0,77146.0,77265.0
7,,ARB,Arab World,92197715.0,94724540.0,97334438.0,100034191.0,102832792.0,105736428.0,108758634.0,...,363156800.0,371437600.0,379696500.0,387899800.0,396028300.0,404042900.0,411942800.0,419852000.0,427870300.0,436080700.0
8,Asia,ARE,United Arab Emirates,92417.0,100801.0,112112.0,125130.0,138049.0,149855.0,159979.0,...,8946778.0,9141598.0,9197908.0,9214182.0,9262896.0,9360975.0,9487206.0,9630966.0,9770526.0,9890400.0
9,South America,ARG,Argentina,20481781.0,20817270.0,21153042.0,21488916.0,21824427.0,22159644.0,22494031.0,...,41261490.0,41733270.0,42202940.0,42669500.0,43131970.0,43590370.0,44044810.0,44494500.0,44938710.0,45376760.0


In [99]:
# fill contiennt null values
population['continent'] = population['continent'].fillna(population['country'].map(dict_continent))

In [100]:
population.to_csv('./data/population_full.csv', index=False)

In [101]:
population[population['continent'].isnull()]

Unnamed: 0,continent,country_code,country,1960,1961,1962,1963,1964,1965,1966,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,,ABW,Aruba,5.420800e+04,5.543400e+04,5.623400e+04,5.669900e+04,5.702900e+04,5.735700e+04,5.770200e+04,...,1.020500e+05,1.025650e+05,1.031650e+05,1.037760e+05,1.043390e+05,1.048650e+05,1.053610e+05,1.058460e+05,1.063100e+05,1.067660e+05
1,,AFE,Africa Eastern and Southern,1.308368e+08,1.341598e+08,1.376146e+08,1.412020e+08,1.449202e+08,1.487700e+08,1.527527e+08,...,5.327604e+08,5.474829e+08,5.626016e+08,5.780754e+08,5.938718e+08,6.099789e+08,6.263929e+08,6.430901e+08,6.600463e+08,6.772433e+08
3,,AFW,Africa Western and Central,9.639642e+07,9.840722e+07,1.005070e+08,1.026913e+08,1.049535e+08,1.072899e+08,1.097018e+08,...,3.602854e+08,3.702430e+08,3.804379e+08,3.908830e+08,4.015867e+08,4.125513e+08,4.237699e+08,4.352294e+08,4.469116e+08,4.588035e+08
7,,ARB,Arab World,9.219772e+07,9.472454e+07,9.733444e+07,1.000342e+08,1.028328e+08,1.057364e+08,1.087586e+08,...,3.631568e+08,3.714376e+08,3.796965e+08,3.878998e+08,3.960283e+08,4.040429e+08,4.119428e+08,4.198520e+08,4.278703e+08,4.360807e+08
11,,ASM,American Samoa,2.012700e+04,2.060500e+04,2.124600e+04,2.202900e+04,2.285000e+04,2.367500e+04,2.447300e+04,...,5.575500e+04,5.566900e+04,5.571700e+04,5.579100e+04,5.580600e+04,5.573900e+04,5.561700e+04,5.546100e+04,5.531200e+04,5.519700e+04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249,,UMC,Upper middle income,1.136464e+09,1.140114e+09,1.156187e+09,1.183604e+09,1.210649e+09,1.238613e+09,1.269459e+09,...,2.364103e+09,2.383978e+09,2.404063e+09,2.423763e+09,2.442626e+09,2.461213e+09,2.480025e+09,2.496446e+09,2.510773e+09,2.522438e+09
256,,VGB,British Virgin Islands,8.053000e+03,8.164000e+03,8.319000e+03,8.469000e+03,8.644000e+03,8.836000e+03,9.022000e+03,...,2.832600e+04,2.865400e+04,2.885000e+04,2.898500e+04,2.914800e+04,2.935500e+04,2.956700e+04,2.979500e+04,3.003300e+04,3.023700e+04
257,,VIR,Virgin Islands (U.S.),3.250000e+04,3.430000e+04,3.500000e+04,3.980000e+04,4.080000e+04,4.350000e+04,4.620000e+04,...,1.082900e+05,1.081880e+05,1.080410e+05,1.078820e+05,1.077120e+05,1.075160e+05,1.072810e+05,1.070010e+05,1.066690e+05,1.062900e+05
260,,WLD,World,3.032156e+09,3.071596e+09,3.124561e+09,3.189656e+09,3.255146e+09,3.322047e+09,3.392098e+09,...,7.003760e+09,7.089255e+09,7.175500e+09,7.261847e+09,7.347679e+09,7.433569e+09,7.519183e+09,7.602433e+09,7.683438e+09,7.763499e+09


#### About these extra rows in population 

73 row extra

'Arab World'
'World'
'Virgin Islands' (US)
'Africa Eastern and Southern'

Not countries but extra groups of population

In [102]:
extra_population.to_csv('./data/extra_population.csv', index=False)

In [103]:
#income inequality, what extra countries, extra rows compares to the base countries
extra_income_ineq = pd.merge(countries, income_inequality, on='country', how='outer')
extra_income_ineq.sort_values(by='country').head(50)

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Asia,Afghanistan,AFG,30.5,30.5,30.5,30.5,30.5,30.5,30.5,...,37.7,37.7,37.7,37.7,37.7,37.7,37.7,37.7,38.3,39.0
1,Europe,Albania,ALB,39.9,39.9,39.9,39.9,39.9,39.9,39.9,...,29.0,31.6,34.6,32.9,33.7,33.2,33.2,33.2,33.8,34.5
2,Africa,Algeria,DZA,56.2,56.2,56.2,56.2,56.2,56.2,56.2,...,27.4,27.3,27.3,27.3,27.3,27.3,27.3,27.3,27.9,28.6
3,Europe,Andorra,AND,42.5,42.5,42.5,42.5,42.5,42.5,42.5,...,35.5,35.4,35.4,35.3,35.2,35.2,35.1,35.0,35.0,35.0
4,Africa,Angola,AGO,57.7,57.7,57.7,57.7,57.7,57.7,57.7,...,46.0,46.8,47.7,48.5,49.4,50.3,51.3,51.3,52.1,53.0
5,North America,Antigua and Barbuda,ATG,43.4,43.4,43.4,43.4,43.4,43.4,43.4,...,37.4,37.4,37.4,37.3,37.3,37.2,37.1,37.0,37.0,37.0
6,South America,Argentina,ARG,47.7,47.7,47.7,47.7,47.7,47.7,47.7,...,41.3,41.0,41.6,41.8,42.0,41.1,41.3,42.9,43.6,44.6
7,Europe,Armenia,ARM,31.5,31.5,31.5,31.5,31.5,31.5,31.5,...,29.6,30.6,31.5,32.4,32.5,33.6,34.4,29.9,30.5,31.2
8,Australia and Oceania,Australia,AUS,38.7,38.7,38.7,38.7,38.7,38.7,38.7,...,34.5,34.5,34.4,34.4,34.4,34.4,34.4,34.4,34.4,34.4
9,Europe,Austria,AUT,53.4,53.4,53.4,53.4,53.4,53.4,53.4,...,30.5,30.8,30.5,30.5,30.8,29.7,30.9,30.9,31.4,32.2


In [104]:
extra_income_ineq['country_code'] = extra_income_ineq['country_code'].fillna(extra_income_ineq['country'].map(dict_country_code_2))
extra_income_ineq['country'] = extra_income_ineq['country'].replace(rename_dict)
extra_income_ineq.sort_values(by='country').head(50)

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Asia,Afghanistan,AFG,30.5,30.5,30.5,30.5,30.5,30.5,30.5,...,37.7,37.7,37.7,37.7,37.7,37.7,37.7,37.7,38.3,39.0
1,Europe,Albania,ALB,39.9,39.9,39.9,39.9,39.9,39.9,39.9,...,29.0,31.6,34.6,32.9,33.7,33.2,33.2,33.2,33.8,34.5
2,Africa,Algeria,DZA,56.2,56.2,56.2,56.2,56.2,56.2,56.2,...,27.4,27.3,27.3,27.3,27.3,27.3,27.3,27.3,27.9,28.6
3,Europe,Andorra,AND,42.5,42.5,42.5,42.5,42.5,42.5,42.5,...,35.5,35.4,35.4,35.3,35.2,35.2,35.1,35.0,35.0,35.0
4,Africa,Angola,AGO,57.7,57.7,57.7,57.7,57.7,57.7,57.7,...,46.0,46.8,47.7,48.5,49.4,50.3,51.3,51.3,52.1,53.0
5,North America,Antigua and Barbuda,ATG,43.4,43.4,43.4,43.4,43.4,43.4,43.4,...,37.4,37.4,37.4,37.3,37.3,37.2,37.1,37.0,37.0,37.0
6,South America,Argentina,ARG,47.7,47.7,47.7,47.7,47.7,47.7,47.7,...,41.3,41.0,41.6,41.8,42.0,41.1,41.3,42.9,43.6,44.6
7,Europe,Armenia,ARM,31.5,31.5,31.5,31.5,31.5,31.5,31.5,...,29.6,30.6,31.5,32.4,32.5,33.6,34.4,29.9,30.5,31.2
8,Australia and Oceania,Australia,AUS,38.7,38.7,38.7,38.7,38.7,38.7,38.7,...,34.5,34.5,34.4,34.4,34.4,34.4,34.4,34.4,34.4,34.4
9,Europe,Austria,AUT,53.4,53.4,53.4,53.4,53.4,53.4,53.4,...,30.5,30.8,30.5,30.5,30.8,29.7,30.9,30.9,31.4,32.2


In [105]:
extra_income_ineq['nan_count'] = extra_income_ineq.isnull().sum(axis=1)
extra_income_ineq = extra_income_ineq.sort_values(by=['country_code', 'nan_count'])
extra_income_ineq.head(30)

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,nan_count
0,Asia,Afghanistan,AFG,30.5,30.5,30.5,30.5,30.5,30.5,30.5,...,37.7,37.7,37.7,37.7,37.7,37.7,37.7,38.3,39.0,0
4,Africa,Angola,AGO,57.7,57.7,57.7,57.7,57.7,57.7,57.7,...,46.8,47.7,48.5,49.4,50.3,51.3,51.3,52.1,53.0,0
1,Europe,Albania,ALB,39.9,39.9,39.9,39.9,39.9,39.9,39.9,...,31.6,34.6,32.9,33.7,33.2,33.2,33.2,33.8,34.5,0
3,Europe,Andorra,AND,42.5,42.5,42.5,42.5,42.5,42.5,42.5,...,35.4,35.4,35.3,35.2,35.2,35.1,35.0,35.0,35.0,0
204,Asia,United Arab Emirates,ARE,39.8,39.8,39.8,39.8,39.8,39.8,39.8,...,32.5,31.1,29.7,28.4,27.2,26.0,26.0,26.0,26.7,0
6,South America,Argentina,ARG,47.7,47.7,47.7,47.7,47.7,47.7,47.7,...,41.0,41.6,41.8,42.0,41.1,41.3,42.9,43.6,44.6,0
7,Europe,Armenia,ARM,31.5,31.5,31.5,31.5,31.5,31.5,31.5,...,30.6,31.5,32.4,32.5,33.6,34.4,29.9,30.5,31.2,0
5,North America,Antigua and Barbuda,ATG,43.4,43.4,43.4,43.4,43.4,43.4,43.4,...,37.4,37.4,37.3,37.3,37.2,37.1,37.0,37.0,37.0,0
8,Australia and Oceania,Australia,AUS,38.7,38.7,38.7,38.7,38.7,38.7,38.7,...,34.5,34.4,34.4,34.4,34.4,34.4,34.4,34.4,34.4,0
9,Europe,Austria,AUT,53.4,53.4,53.4,53.4,53.4,53.4,53.4,...,30.8,30.5,30.5,30.8,29.7,30.9,30.9,31.4,32.2,0


In [106]:
extra_income_ineq = extra_income_ineq.drop_duplicates(subset='country_code', keep='first')
extra_income_ineq.head(20)

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,nan_count
0,Asia,Afghanistan,AFG,30.5,30.5,30.5,30.5,30.5,30.5,30.5,...,37.7,37.7,37.7,37.7,37.7,37.7,37.7,38.3,39.0,0
4,Africa,Angola,AGO,57.7,57.7,57.7,57.7,57.7,57.7,57.7,...,46.8,47.7,48.5,49.4,50.3,51.3,51.3,52.1,53.0,0
1,Europe,Albania,ALB,39.9,39.9,39.9,39.9,39.9,39.9,39.9,...,31.6,34.6,32.9,33.7,33.2,33.2,33.2,33.8,34.5,0
3,Europe,Andorra,AND,42.5,42.5,42.5,42.5,42.5,42.5,42.5,...,35.4,35.4,35.3,35.2,35.2,35.1,35.0,35.0,35.0,0
204,Asia,United Arab Emirates,ARE,39.8,39.8,39.8,39.8,39.8,39.8,39.8,...,32.5,31.1,29.7,28.4,27.2,26.0,26.0,26.0,26.7,0
6,South America,Argentina,ARG,47.7,47.7,47.7,47.7,47.7,47.7,47.7,...,41.0,41.6,41.8,42.0,41.1,41.3,42.9,43.6,44.6,0
7,Europe,Armenia,ARM,31.5,31.5,31.5,31.5,31.5,31.5,31.5,...,30.6,31.5,32.4,32.5,33.6,34.4,29.9,30.5,31.2,0
5,North America,Antigua and Barbuda,ATG,43.4,43.4,43.4,43.4,43.4,43.4,43.4,...,37.4,37.4,37.3,37.3,37.2,37.1,37.0,37.0,37.0,0
8,Australia and Oceania,Australia,AUS,38.7,38.7,38.7,38.7,38.7,38.7,38.7,...,34.5,34.4,34.4,34.4,34.4,34.4,34.4,34.4,34.4,0
9,Europe,Austria,AUT,53.4,53.4,53.4,53.4,53.4,53.4,53.4,...,30.8,30.5,30.5,30.8,29.7,30.9,30.9,31.4,32.2,0


In [107]:
extra_income_ineq.drop('nan_count', axis=1, inplace=True)
extra_income_ineq.to_csv('./data/income_inequality_full.csv', index=False)

In [108]:
extra_income_ineq.isnull().sum().sum()

22

In [109]:
income_inequality.isnull().sum().sum()

0

In [110]:
dif_country_name = extra_income_ineq[extra_income_ineq['continent'].isnull()]
dif_country_name.head()

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
27,,Burkina,BFA,37.6,37.6,37.6,37.6,37.6,37.6,37.6,...,37.0,36.1,35.3,35.1,35.1,35.1,35.1,35.1,35.8,36.6
45,,Ivory Coast,CIV,39.9,39.9,39.9,39.9,39.9,39.9,39.9,...,42.2,42.0,41.7,41.5,41.5,41.5,41.5,41.5,42.2,43.0
41,,"Congo, Democratic Republic of",COD,40.4,40.4,40.4,40.4,40.4,40.4,40.4,...,42.1,42.1,42.1,42.1,42.1,42.1,42.1,42.1,43.3,44.7
43,,Congo,COG,56.6,56.6,56.6,56.6,56.6,56.6,56.6,...,48.9,48.9,48.9,48.9,48.9,48.9,48.9,48.9,49.7,50.7
124,,"Micronesia, Fed. Sts.",FSM,46.1,46.1,46.1,46.1,46.1,46.1,46.1,...,40.3,40.1,40.7,40.6,40.6,40.6,40.6,40.6,40.6,40.6


In [111]:
dif_country_name['country']

27                            Burkina
45                        Ivory Coast
41      Congo, Democratic Republic of
43                              Congo
124             Micronesia, Fed. Sts.
80                   Hong Kong, China
100                        Kyrgyzstan
181               St. Kitts and Nevis
177                      Korea, South
102                              Laos
182                         St. Lucia
141                         Macedonia
131                   Burma (Myanmar)
140                       North Korea
146                         Palestine
156                            Russia
171                          Slovakia
61                           Eswatini
194                       Timor-Leste
190                            Taiwan
78                           Holy See
183    St. Vincent and the Grenadines
Name: country, dtype: object

In [112]:
dict_country_code_2 = {
    "Burkina Faso": "BFA",
    "Congo, Dem. Rep.": "COD",
    "Congo, Rep.": "COG",
    "Cote d'Ivoire": "CIV",
    "Eswatini": "SWZ",
    "Holy See": "VAT",
    "Hong Kong, China": "HKG",
    "Kyrgyz Republic": "KGZ",
    "Lao": "LAO",
    "Micronesia, Fed. Sts.": "FSM",
    "Myanmar": "MMR",
    "North Korea": "PRK",
    "North Macedonia": "MKD",
    "Palestine": "PSE",
    "Russia": "RUS",
    "Slovak Republic": "SVK",
    "South Korea": "KOR",
    "St. Kitts and Nevis": "KNA",
    "St. Lucia": "LCA",
    "St. Vincent and the Grenadines": "VCT",
    "Taiwan": "TWN",
    "Timor-Leste": "TLS"
}

dif_country_name['country_code'] = dif_country_name['country_code'].fillna(dif_country_name['country'].map(dict_country_code_2))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dif_country_name['country_code'] = dif_country_name['country_code'].fillna(dif_country_name['country'].map(dict_country_code_2))


In [113]:
dif_name = dif_country_name[dif_country_name['continent'].isnull()]
dif_name.sort_values(by='country_code')

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
27,,Burkina,BFA,37.6,37.6,37.6,37.6,37.6,37.6,37.6,...,37.0,36.1,35.3,35.1,35.1,35.1,35.1,35.1,35.8,36.6
45,,Ivory Coast,CIV,39.9,39.9,39.9,39.9,39.9,39.9,39.9,...,42.2,42.0,41.7,41.5,41.5,41.5,41.5,41.5,42.2,43.0
41,,"Congo, Democratic Republic of",COD,40.4,40.4,40.4,40.4,40.4,40.4,40.4,...,42.1,42.1,42.1,42.1,42.1,42.1,42.1,42.1,43.3,44.7
43,,Congo,COG,56.6,56.6,56.6,56.6,56.6,56.6,56.6,...,48.9,48.9,48.9,48.9,48.9,48.9,48.9,48.9,49.7,50.7
124,,"Micronesia, Fed. Sts.",FSM,46.1,46.1,46.1,46.1,46.1,46.1,46.1,...,40.3,40.1,40.7,40.6,40.6,40.6,40.6,40.6,40.6,40.6
80,,"Hong Kong, China",HKG,48.7,48.7,48.7,48.7,48.7,48.7,48.7,...,53.7,53.7,53.7,53.7,53.7,53.7,53.7,53.7,53.7,53.7
100,,Kyrgyzstan,KGZ,46.0,46.0,46.0,46.0,46.0,46.0,46.0,...,27.4,28.8,26.8,29.0,26.8,27.3,27.7,29.7,30.3,31.0
181,,St. Kitts and Nevis,KNA,42.6,42.6,42.6,42.6,42.6,42.6,42.6,...,36.9,36.8,36.7,36.7,36.7,36.7,36.7,36.6,36.6,36.6
177,,"Korea, South",KOR,32.1,32.1,32.1,32.1,32.1,32.1,32.1,...,31.6,31.4,31.2,31.3,31.4,31.4,31.4,31.4,31.4,32.1
102,,Laos,LAO,32.6,32.6,32.6,32.6,32.6,32.6,32.6,...,36.0,36.4,36.9,37.4,37.8,38.3,38.8,38.8,38.8,38.8


In [114]:
countries_name_temp = pd.merge(countries[['country', 'country_code']], dif_country_name, on='country_code', how='inner')
countries_name_temp.drop('country_y', inplace=True, axis=1)
countries_name_temp.sort_values(by='country_code')

Unnamed: 0,country_x,country_code,continent,1800,1801,1802,1803,1804,1805,1806,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Burkina,BFA,,37.6,37.6,37.6,37.6,37.6,37.6,37.6,...,37.0,36.1,35.3,35.1,35.1,35.1,35.1,35.1,35.8,36.6
3,Ivory Coast,CIV,,39.9,39.9,39.9,39.9,39.9,39.9,39.9,...,42.2,42.0,41.7,41.5,41.5,41.5,41.5,41.5,42.2,43.0
2,"Congo, Democratic Republic of",COD,,40.4,40.4,40.4,40.4,40.4,40.4,40.4,...,42.1,42.1,42.1,42.1,42.1,42.1,42.1,42.1,43.3,44.7
1,Congo,COG,,56.6,56.6,56.6,56.6,56.6,56.6,56.6,...,48.9,48.9,48.9,48.9,48.9,48.9,48.9,48.9,49.7,50.7
18,Micronesia,FSM,,46.1,46.1,46.1,46.1,46.1,46.1,46.1,...,40.3,40.1,40.7,40.6,40.6,40.6,40.6,40.6,40.6,40.6
9,Kyrgyzstan,KGZ,,46.0,46.0,46.0,46.0,46.0,46.0,46.0,...,27.4,28.8,26.8,29.0,26.8,27.3,27.7,29.7,30.3,31.0
15,Saint Kitts and Nevis,KNA,,42.6,42.6,42.6,42.6,42.6,42.6,42.6,...,36.9,36.8,36.7,36.7,36.7,36.7,36.7,36.6,36.6,36.6
8,"Korea, South",KOR,,32.1,32.1,32.1,32.1,32.1,32.1,32.1,...,31.6,31.4,31.2,31.3,31.4,31.4,31.4,31.4,31.4,32.1
10,Laos,LAO,,32.6,32.6,32.6,32.6,32.6,32.6,32.6,...,36.0,36.4,36.9,37.4,37.8,38.3,38.8,38.8,38.8,38.8
16,Saint Lucia,LCA,,42.6,42.6,42.6,42.6,42.6,42.6,42.6,...,49.5,49.9,50.3,50.8,51.2,51.2,51.2,51.2,51.2,51.2


In [115]:
countries_name_temp.rename(columns = {'country_x' : 'country'}, inplace=True)
countries_name_temp.sort_values(by='country_code')

Unnamed: 0,country,country_code,continent,1800,1801,1802,1803,1804,1805,1806,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Burkina,BFA,,37.6,37.6,37.6,37.6,37.6,37.6,37.6,...,37.0,36.1,35.3,35.1,35.1,35.1,35.1,35.1,35.8,36.6
3,Ivory Coast,CIV,,39.9,39.9,39.9,39.9,39.9,39.9,39.9,...,42.2,42.0,41.7,41.5,41.5,41.5,41.5,41.5,42.2,43.0
2,"Congo, Democratic Republic of",COD,,40.4,40.4,40.4,40.4,40.4,40.4,40.4,...,42.1,42.1,42.1,42.1,42.1,42.1,42.1,42.1,43.3,44.7
1,Congo,COG,,56.6,56.6,56.6,56.6,56.6,56.6,56.6,...,48.9,48.9,48.9,48.9,48.9,48.9,48.9,48.9,49.7,50.7
18,Micronesia,FSM,,46.1,46.1,46.1,46.1,46.1,46.1,46.1,...,40.3,40.1,40.7,40.6,40.6,40.6,40.6,40.6,40.6,40.6
9,Kyrgyzstan,KGZ,,46.0,46.0,46.0,46.0,46.0,46.0,46.0,...,27.4,28.8,26.8,29.0,26.8,27.3,27.7,29.7,30.3,31.0
15,Saint Kitts and Nevis,KNA,,42.6,42.6,42.6,42.6,42.6,42.6,42.6,...,36.9,36.8,36.7,36.7,36.7,36.7,36.7,36.6,36.6,36.6
8,"Korea, South",KOR,,32.1,32.1,32.1,32.1,32.1,32.1,32.1,...,31.6,31.4,31.2,31.3,31.4,31.4,31.4,31.4,31.4,32.1
10,Laos,LAO,,32.6,32.6,32.6,32.6,32.6,32.6,32.6,...,36.0,36.4,36.9,37.4,37.8,38.3,38.8,38.8,38.8,38.8
16,Saint Lucia,LCA,,42.6,42.6,42.6,42.6,42.6,42.6,42.6,...,49.5,49.9,50.3,50.8,51.2,51.2,51.2,51.2,51.2,51.2


In [116]:
income_ineq = pd.concat([extra_income_ineq, 

SyntaxError: incomplete input (568348416.py, line 1)

In [117]:
#  dropping the old rows with old country names : 

extra_income_ineq.dropna(subset=['continent'], inplace=True)

In [118]:
income_ineq = pd.concat([extra_income_ineq, 

SyntaxError: incomplete input (568348416.py, line 1)

In [119]:
income_ineq[income_ineq['country'] == 'Burkina']

NameError: name 'income_ineq' is not defined

In [120]:
income_ineq = income_ineq.dropna(subset=['2010'])
income_ineq

NameError: name 'income_ineq' is not defined

In [121]:
income_ineq.drop('continent',axis=1, inplace=True)

NameError: name 'income_ineq' is not defined

In [122]:
income_ineq.shape

NameError: name 'income_ineq' is not defined

In [123]:
#Checking for average daily income the name of countries 

average_daily_income[average_daily_income['country'] == 'Burkina Faso']

Unnamed: 0,country,1800,1801,1802,1803,1804,1805,1806,1807,1808,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
14,Burkina Faso,695.0,695.0,695.0,695.0,695.0,695.0,695.0,695.0,695.0,...,2.69,2.75,2.78,2.81,2.89,2.98,3.09,3.18,3.11,3.15


In [124]:
#income inequality, what extra countries, extra rows compares to the base countries
extra_income_avg = pd.merge(countries, average_daily_income, on='country', how='outer')
extra_income_avg

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Asia,Afghanistan,AFG,919.00,919.00,919.00,919.00,919.00,919.00,919.00,...,2.83,2.89,2.87,2.82,2.81,2.81,2.77,2.82,2.62,2.66
1,Europe,Albania,ALB,1.22,1.22,1.22,1.22,1.22,1.22,1.22,...,7.99,8.20,8.52,8.62,8.79,8.73,9.04,9.35,9.04,9.50
2,Africa,Algeria,DZA,716.00,716.00,716.00,716.00,716.00,716.00,716.00,...,8.53,8.78,8.98,9.14,9.25,9.23,9.30,9.31,8.60,8.69
3,Europe,Andorra,AND,1.48,1.49,1.49,1.49,1.49,1.50,1.50,...,52.00,54.20,55.70,57.70,59.70,61.70,63.80,66.00,68.30,70.60
4,Africa,Angola,AGO,2.27,2.27,2.28,2.29,2.30,2.31,2.32,...,3.73,3.76,3.78,3.67,3.55,3.37,3.12,2.99,2.79,2.72
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210,South America,Venezuela,VEN,1.05,1.05,1.04,1.04,1.03,1.03,1.02,...,13.20,13.60,13.10,12.10,9.78,8.28,6.89,4.65,3.24,2.95
211,Asia,Vietnam,VNM,1.16,1.16,1.16,1.16,1.16,1.16,1.16,...,8.04,8.25,8.51,9.24,9.83,10.40,10.90,11.60,11.90,12.50
212,Asia,Yemen,YEM,1.22,1.23,1.23,1.23,1.24,1.24,1.24,...,4.23,4.35,4.18,2.93,2.59,2.40,2.36,2.36,2.18,2.14
213,Africa,Zambia,ZMB,1.26,1.26,1.27,1.27,1.27,1.27,1.28,...,2.78,2.85,2.88,2.91,2.93,2.94,2.97,2.93,2.74,2.68


In [125]:
extra_income_avg['country_code'] = extra_income_avg['country_code'].fillna(extra_income_avg['country'].map(dict_country_code_2))
extra_income_avg['country'] = extra_income_avg['country'].replace(rename_dict)
extra_income_avg.head(50)

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Asia,Afghanistan,AFG,919.0,919.0,919.0,919.0,919.0,919.0,919.0,...,2.83,2.89,2.87,2.82,2.81,2.81,2.77,2.82,2.62,2.66
1,Europe,Albania,ALB,1.22,1.22,1.22,1.22,1.22,1.22,1.22,...,7.99,8.2,8.52,8.62,8.79,8.73,9.04,9.35,9.04,9.5
2,Africa,Algeria,DZA,716.0,716.0,716.0,716.0,716.0,716.0,716.0,...,8.53,8.78,8.98,9.14,9.25,9.23,9.3,9.31,8.6,8.69
3,Europe,Andorra,AND,1.48,1.49,1.49,1.49,1.49,1.5,1.5,...,52.0,54.2,55.7,57.7,59.7,61.7,63.8,66.0,68.3,70.6
4,Africa,Angola,AGO,2.27,2.27,2.28,2.29,2.3,2.31,2.32,...,3.73,3.76,3.78,3.67,3.55,3.37,3.12,2.99,2.79,2.72
5,North America,Antigua and Barbuda,ATG,883.0,883.0,883.0,883.0,883.0,883.0,883.0,...,18.7,18.4,18.9,19.4,20.3,20.7,22.0,22.9,18.6,17.8
6,South America,Argentina,ARG,2.4,2.41,2.42,2.43,2.44,2.45,2.46,...,20.5,20.6,19.3,20.2,20.2,21.2,19.1,18.7,16.6,17.4
7,Europe,Armenia,ARM,795.0,795.0,795.0,795.0,795.0,795.0,795.0,...,6.68,6.93,7.11,7.62,8.03,7.81,7.81,6.99,6.46,6.53
8,Australia and Oceania,Australia,AUS,1.19,1.2,1.2,1.21,1.22,1.23,1.23,...,58.5,58.5,59.2,59.8,60.5,60.8,61.6,61.7,59.8,62.4
9,Europe,Austria,AUT,2.77,2.78,2.79,2.81,2.82,2.83,2.84,...,54.2,56.9,56.0,55.8,58.3,57.6,58.2,58.4,54.3,55.9


In [126]:
extra_income_avg['nan_count'] = extra_income_avg.isnull().sum(axis=1)
extra_income_avg = extra_income_avg.sort_values(by=['country_code', 'nan_count'])
extra_income_avg

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,nan_count
0,Asia,Afghanistan,AFG,919.00,919.00,919.00,919.00,919.00,919.00,919.00,...,2.89,2.87,2.82,2.81,2.81,2.77,2.82,2.62,2.66,0
4,Africa,Angola,AGO,2.27,2.27,2.28,2.29,2.30,2.31,2.32,...,3.76,3.78,3.67,3.55,3.37,3.12,2.99,2.79,2.72,0
1,Europe,Albania,ALB,1.22,1.22,1.22,1.22,1.22,1.22,1.22,...,8.20,8.52,8.62,8.79,8.73,9.04,9.35,9.04,9.50,0
3,Europe,Andorra,AND,1.48,1.49,1.49,1.49,1.49,1.50,1.50,...,54.20,55.70,57.70,59.70,61.70,63.80,66.00,68.30,70.60,0
203,Asia,United Arab Emirates,ARE,2.90,2.91,2.92,2.93,2.94,2.95,2.96,...,73.60,83.70,69.10,71.20,78.70,84.10,94.00,85.80,85.80,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,Australia and Oceania,Samoa,WSM,2.76,2.76,2.76,2.76,2.76,2.76,2.76,...,9.50,9.44,9.78,10.50,10.60,10.30,10.60,10.20,9.33,0
212,Asia,Yemen,YEM,1.22,1.23,1.23,1.23,1.24,1.24,1.24,...,4.35,4.18,2.93,2.59,2.40,2.36,2.36,2.18,2.14,0
175,Africa,South Africa,ZAF,1.34,1.32,1.31,1.29,1.28,1.21,1.21,...,11.80,11.60,11.30,11.20,11.20,11.10,11.00,10.10,10.30,0
213,Africa,Zambia,ZMB,1.26,1.26,1.27,1.27,1.27,1.27,1.28,...,2.85,2.88,2.91,2.93,2.94,2.97,2.93,2.74,2.68,0


In [127]:
extra_income_avg = extra_income_avg.drop_duplicates(subset='country_code', keep='first')
extra_income_avg.head(20)

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,nan_count
0,Asia,Afghanistan,AFG,919.0,919.0,919.0,919.0,919.0,919.0,919.0,...,2.89,2.87,2.82,2.81,2.81,2.77,2.82,2.62,2.66,0
4,Africa,Angola,AGO,2.27,2.27,2.28,2.29,2.3,2.31,2.32,...,3.76,3.78,3.67,3.55,3.37,3.12,2.99,2.79,2.72,0
1,Europe,Albania,ALB,1.22,1.22,1.22,1.22,1.22,1.22,1.22,...,8.2,8.52,8.62,8.79,8.73,9.04,9.35,9.04,9.5,0
3,Europe,Andorra,AND,1.48,1.49,1.49,1.49,1.49,1.5,1.5,...,54.2,55.7,57.7,59.7,61.7,63.8,66.0,68.3,70.6,0
203,Asia,United Arab Emirates,ARE,2.9,2.91,2.92,2.93,2.94,2.95,2.96,...,73.6,83.7,69.1,71.2,78.7,84.1,94.0,85.8,85.8,0
6,South America,Argentina,ARG,2.4,2.41,2.42,2.43,2.44,2.45,2.46,...,20.6,19.3,20.2,20.2,21.2,19.1,18.7,16.6,17.4,0
7,Europe,Armenia,ARM,795.0,795.0,795.0,795.0,795.0,795.0,795.0,...,6.93,7.11,7.62,8.03,7.81,7.81,6.99,6.46,6.53,0
5,North America,Antigua and Barbuda,ATG,883.0,883.0,883.0,883.0,883.0,883.0,883.0,...,18.4,18.9,19.4,20.3,20.7,22.0,22.9,18.6,17.8,0
8,Australia and Oceania,Australia,AUS,1.19,1.2,1.2,1.21,1.22,1.23,1.23,...,58.5,59.2,59.8,60.5,60.8,61.6,61.7,59.8,62.4,0
9,Europe,Austria,AUT,2.77,2.78,2.79,2.81,2.82,2.83,2.84,...,56.9,56.0,55.8,58.3,57.6,58.2,58.4,54.3,55.9,0


In [128]:
extra_income_avg.drop('nan_count', axis=1, inplace=True)
extra_income_avg.to_csv('./data/income_avg_full.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  extra_income_avg.drop('nan_count', axis=1, inplace=True)


In [129]:
dif_country_name_2 = extra_income_avg[extra_income_avg['continent'].isnull()]
dif_country_name_2.sort_values(by='country_code')

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
27,,Burkina,BFA,695.0,695.0,695.0,695.0,695.0,695.0,695.0,...,2.69,2.75,2.78,2.81,2.89,2.98,3.09,3.18,3.11,3.15
45,,Ivory Coast,CIV,1.87,1.87,1.87,1.87,1.87,1.87,1.87,...,3.47,3.57,3.67,3.77,3.94,4.13,4.3,4.45,4.44,4.58
41,,"Congo, Democratic Republic of",COD,534.0,536.0,539.0,541.0,543.0,545.0,547.0,...,1.44,1.57,1.66,1.72,1.7,1.71,1.75,1.77,1.71,1.72
43,,Congo,COG,389.0,389.0,0.39,392.0,393.0,394.0,395.0,...,3.96,3.84,3.97,3.74,3.28,3.05,2.79,2.62,2.36,2.3
123,,"Micronesia, Fed. Sts.",FSM,1.22,1.22,1.22,1.22,1.22,1.22,1.22,...,5.7,5.16,4.97,5.13,5.12,5.19,5.15,5.21,5.11,4.92
79,,"Hong Kong, China",HKG,1.26,1.27,1.28,1.29,1.3,1.31,1.32,...,59.9,61.5,62.8,63.7,64.7,66.7,68.0,66.7,63.0,65.3
99,,Kyrgyzstan,KGZ,757.0,757.0,757.0,757.0,757.0,757.0,759.0,...,5.58,5.5,5.75,5.66,5.77,5.81,6.21,6.58,5.93,6.16
180,,St. Kitts and Nevis,KNA,918.0,0.92,0.92,0.92,0.92,0.92,0.92,...,16.4,17.1,18.0,18.0,18.4,17.9,18.3,18.7,15.0,14.6
176,,"Korea, South",KOR,1.34,1.34,1.34,1.34,1.34,1.34,1.34,...,39.8,40.7,41.7,42.5,43.5,44.6,45.8,46.4,45.9,47.5
101,,Laos,LAO,1.67,1.67,1.67,1.67,1.67,1.67,1.67,...,4.39,4.53,4.68,4.83,4.98,5.14,5.28,5.42,5.32,5.49


In [130]:
dif_country_name_2['country']

27                            Burkina
45                        Ivory Coast
41      Congo, Democratic Republic of
43                              Congo
123             Micronesia, Fed. Sts.
79                   Hong Kong, China
99                         Kyrgyzstan
180               St. Kitts and Nevis
176                      Korea, South
101                              Laos
181                         St. Lucia
140                         Macedonia
130                   Burma (Myanmar)
139                       North Korea
145                         Palestine
155                            Russia
170                          Slovakia
61                           Eswatini
193                       Timor-Leste
189                            Taiwan
182    St. Vincent and the Grenadines
Name: country, dtype: object

In [131]:
dict_country_code_3 = {
    "Burkina Faso": "BFA",
    "Congo, Dem. Rep.": "COD",
    "Congo, Rep.": "COG",
    "Cote d'Ivoire": "CIV",
    "Eswatini": "SWZ",
    "Hong Kong, China": "HKG",
    "Kyrgyz Republic": "KGZ",
    "Lao": "LAO",
    "Micronesia, Fed. Sts.": "FSM",
    "Myanmar": "MMR",
    "North Korea": "PRK",
    "North Macedonia": "MKD",
    "Palestine": "PSE",
    "Russia": "RUS",
    "Slovak Republic": "SVK",
    "South Korea": "KOR",
    "St. Kitts and Nevis": "KNA",
    "St. Lucia": "LCA",
    "St. Vincent and the Grenadines": "VCT",
    "Taiwan": "TWN",
    "Timor-Leste": "TLS"
}


dif_country_name_2['country_code'] = dif_country_name_2['country_code'].fillna(dif_country_name_2['country'].map(dict_country_code_3))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dif_country_name_2['country_code'] = dif_country_name_2['country_code'].fillna(dif_country_name_2['country'].map(dict_country_code_3))


In [132]:
dif_country_name_2.sort_values(by='country_code')

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
27,,Burkina,BFA,695.0,695.0,695.0,695.0,695.0,695.0,695.0,...,2.69,2.75,2.78,2.81,2.89,2.98,3.09,3.18,3.11,3.15
45,,Ivory Coast,CIV,1.87,1.87,1.87,1.87,1.87,1.87,1.87,...,3.47,3.57,3.67,3.77,3.94,4.13,4.3,4.45,4.44,4.58
41,,"Congo, Democratic Republic of",COD,534.0,536.0,539.0,541.0,543.0,545.0,547.0,...,1.44,1.57,1.66,1.72,1.7,1.71,1.75,1.77,1.71,1.72
43,,Congo,COG,389.0,389.0,0.39,392.0,393.0,394.0,395.0,...,3.96,3.84,3.97,3.74,3.28,3.05,2.79,2.62,2.36,2.3
123,,"Micronesia, Fed. Sts.",FSM,1.22,1.22,1.22,1.22,1.22,1.22,1.22,...,5.7,5.16,4.97,5.13,5.12,5.19,5.15,5.21,5.11,4.92
79,,"Hong Kong, China",HKG,1.26,1.27,1.28,1.29,1.3,1.31,1.32,...,59.9,61.5,62.8,63.7,64.7,66.7,68.0,66.7,63.0,65.3
99,,Kyrgyzstan,KGZ,757.0,757.0,757.0,757.0,757.0,757.0,759.0,...,5.58,5.5,5.75,5.66,5.77,5.81,6.21,6.58,5.93,6.16
180,,St. Kitts and Nevis,KNA,918.0,0.92,0.92,0.92,0.92,0.92,0.92,...,16.4,17.1,18.0,18.0,18.4,17.9,18.3,18.7,15.0,14.6
176,,"Korea, South",KOR,1.34,1.34,1.34,1.34,1.34,1.34,1.34,...,39.8,40.7,41.7,42.5,43.5,44.6,45.8,46.4,45.9,47.5
101,,Laos,LAO,1.67,1.67,1.67,1.67,1.67,1.67,1.67,...,4.39,4.53,4.68,4.83,4.98,5.14,5.28,5.42,5.32,5.49


In [133]:
countries_name_temp_2 = pd.merge(countries[['country', 'country_code']], dif_country_name_2, on='country_code', how='inner')
countries_name_temp_2.sort_values(by='country_code')

Unnamed: 0,country_x,country_code,continent,country_y,1800,1801,1802,1803,1804,1805,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Burkina,BFA,,Burkina,695.0,695.0,695.0,695.0,695.0,695.0,...,2.69,2.75,2.78,2.81,2.89,2.98,3.09,3.18,3.11,3.15
3,Ivory Coast,CIV,,Ivory Coast,1.87,1.87,1.87,1.87,1.87,1.87,...,3.47,3.57,3.67,3.77,3.94,4.13,4.3,4.45,4.44,4.58
2,"Congo, Democratic Republic of",COD,,"Congo, Democratic Republic of",534.0,536.0,539.0,541.0,543.0,545.0,...,1.44,1.57,1.66,1.72,1.7,1.71,1.75,1.77,1.71,1.72
1,Congo,COG,,Congo,389.0,389.0,0.39,392.0,393.0,394.0,...,3.96,3.84,3.97,3.74,3.28,3.05,2.79,2.62,2.36,2.3
17,Micronesia,FSM,,"Micronesia, Fed. Sts.",1.22,1.22,1.22,1.22,1.22,1.22,...,5.7,5.16,4.97,5.13,5.12,5.19,5.15,5.21,5.11,4.92
9,Kyrgyzstan,KGZ,,Kyrgyzstan,757.0,757.0,757.0,757.0,757.0,757.0,...,5.58,5.5,5.75,5.66,5.77,5.81,6.21,6.58,5.93,6.16
14,Saint Kitts and Nevis,KNA,,St. Kitts and Nevis,918.0,0.92,0.92,0.92,0.92,0.92,...,16.4,17.1,18.0,18.0,18.4,17.9,18.3,18.7,15.0,14.6
8,"Korea, South",KOR,,"Korea, South",1.34,1.34,1.34,1.34,1.34,1.34,...,39.8,40.7,41.7,42.5,43.5,44.6,45.8,46.4,45.9,47.5
10,Laos,LAO,,Laos,1.67,1.67,1.67,1.67,1.67,1.67,...,4.39,4.53,4.68,4.83,4.98,5.14,5.28,5.42,5.32,5.49
15,Saint Lucia,LCA,,St. Lucia,318.0,318.0,318.0,318.0,318.0,318.0,...,16.5,16.2,17.6,15.7,19.3,19.9,20.3,20.5,16.6,17.0


In [134]:
countries_name_temp_2.drop('country_y', inplace=True, axis=1)

In [135]:
countries_name_temp_2.rename(columns = {'country_x' : 'country'}, inplace=True)
countries_name_temp_2

Unnamed: 0,country,country_code,continent,1800,1801,1802,1803,1804,1805,1806,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Burkina,BFA,,695.0,695.0,695.0,695.0,695.0,695.0,695.0,...,2.69,2.75,2.78,2.81,2.89,2.98,3.09,3.18,3.11,3.15
1,Congo,COG,,389.0,389.0,0.39,392.0,393.0,394.0,395.0,...,3.96,3.84,3.97,3.74,3.28,3.05,2.79,2.62,2.36,2.3
2,"Congo, Democratic Republic of",COD,,534.0,536.0,539.0,541.0,543.0,545.0,547.0,...,1.44,1.57,1.66,1.72,1.7,1.71,1.75,1.77,1.71,1.72
3,Ivory Coast,CIV,,1.87,1.87,1.87,1.87,1.87,1.87,1.87,...,3.47,3.57,3.67,3.77,3.94,4.13,4.3,4.45,4.44,4.58
4,Swaziland,SWZ,,135.0,135.0,135.0,135.0,135.0,135.0,135.0,...,5.05,5.52,5.54,5.75,5.78,5.86,5.94,6.01,5.75,5.77
5,Burma (Myanmar),MMR,,976.0,976.0,976.0,976.0,976.0,976.0,976.0,...,5.09,5.47,5.86,6.22,6.29,6.38,6.77,6.92,7.09,6.42
6,East Timor,TLS,,665.0,668.0,669.0,0.67,671.0,672.0,673.0,...,2.86,3.01,3.19,3.09,3.17,3.2,3.19,3.24,2.93,2.65
7,"Korea, North",PRK,,411.0,411.0,411.0,411.0,0.41,0.41,0.41,...,3.0,3.02,3.03,2.99,2.95,2.84,2.7,2.63,2.55,2.45
8,"Korea, South",KOR,,1.34,1.34,1.34,1.34,1.34,1.34,1.34,...,39.8,40.7,41.7,42.5,43.5,44.6,45.8,46.4,45.9,47.5
9,Kyrgyzstan,KGZ,,757.0,757.0,757.0,757.0,757.0,757.0,759.0,...,5.58,5.5,5.75,5.66,5.77,5.81,6.21,6.58,5.93,6.16


In [136]:
extra_income_avg.dropna(subset=['continent'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  extra_income_avg.dropna(subset=['continent'], inplace=True)


In [137]:
extra_income_avg

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Asia,Afghanistan,AFG,919.00,919.00,919.00,919.00,919.00,919.00,919.00,...,2.83,2.89,2.87,2.82,2.81,2.81,2.77,2.82,2.62,2.66
4,Africa,Angola,AGO,2.27,2.27,2.28,2.29,2.30,2.31,2.32,...,3.73,3.76,3.78,3.67,3.55,3.37,3.12,2.99,2.79,2.72
1,Europe,Albania,ALB,1.22,1.22,1.22,1.22,1.22,1.22,1.22,...,7.99,8.20,8.52,8.62,8.79,8.73,9.04,9.35,9.04,9.50
3,Europe,Andorra,AND,1.48,1.49,1.49,1.49,1.49,1.50,1.50,...,52.00,54.20,55.70,57.70,59.70,61.70,63.80,66.00,68.30,70.60
203,Asia,United Arab Emirates,ARE,2.90,2.91,2.92,2.93,2.94,2.95,2.96,...,70.40,73.60,83.70,69.10,71.20,78.70,84.10,94.00,85.80,85.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,Australia and Oceania,Samoa,WSM,2.76,2.76,2.76,2.76,2.76,2.76,2.76,...,9.58,9.50,9.44,9.78,10.50,10.60,10.30,10.60,10.20,9.33
212,Asia,Yemen,YEM,1.22,1.23,1.23,1.23,1.24,1.24,1.24,...,4.23,4.35,4.18,2.93,2.59,2.40,2.36,2.36,2.18,2.14
175,Africa,South Africa,ZAF,1.34,1.32,1.31,1.29,1.28,1.21,1.21,...,11.90,11.80,11.60,11.30,11.20,11.20,11.10,11.00,10.10,10.30
213,Africa,Zambia,ZMB,1.26,1.26,1.27,1.27,1.27,1.27,1.28,...,2.78,2.85,2.88,2.91,2.93,2.94,2.97,2.93,2.74,2.68


In [138]:
income_avg = pd.concat([extra_income_avg, countries_name_temp_2], ignore_index=True)
income_avg

Unnamed: 0,continent,country,country_code,1800,1801,1802,1803,1804,1805,1806,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Asia,Afghanistan,AFG,919.00,919.00,919.00,919.00,919.00,919.00,919.00,...,2.83,2.89,2.87,2.82,2.81,2.81,2.77,2.82,2.62,2.66
1,Africa,Angola,AGO,2.27,2.27,2.28,2.29,2.30,2.31,2.32,...,3.73,3.76,3.78,3.67,3.55,3.37,3.12,2.99,2.79,2.72
2,Europe,Albania,ALB,1.22,1.22,1.22,1.22,1.22,1.22,1.22,...,7.99,8.20,8.52,8.62,8.79,8.73,9.04,9.35,9.04,9.50
3,Europe,Andorra,AND,1.48,1.49,1.49,1.49,1.49,1.50,1.50,...,52.00,54.20,55.70,57.70,59.70,61.70,63.80,66.00,68.30,70.60
4,Asia,United Arab Emirates,ARE,2.90,2.91,2.92,2.93,2.94,2.95,2.96,...,70.40,73.60,83.70,69.10,71.20,78.70,84.10,94.00,85.80,85.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189,,Slovakia,SVK,2.11,2.11,2.11,2.11,2.11,2.11,2.11,...,22.60,23.20,22.60,23.10,23.40,24.40,25.50,26.00,24.60,25.70
190,,Saint Kitts and Nevis,KNA,918.00,0.92,0.92,0.92,0.92,0.92,0.92,...,16.40,17.10,18.00,18.00,18.40,17.90,18.30,18.70,15.00,14.60
191,,Saint Lucia,LCA,318.00,318.00,318.00,318.00,318.00,318.00,318.00,...,16.50,16.20,17.60,15.70,19.30,19.90,20.30,20.50,16.60,17.00
192,,Saint Vincent and the Grenadines,VCT,1.08,1.08,1.08,1.08,1.08,1.08,1.08,...,13.30,13.50,13.60,13.80,14.00,14.10,14.30,14.40,13.70,13.70


In [139]:
income_avg.shape

(194, 225)

In [140]:
income_avg = income_avg.dropna(subset=['2010'])
income_avg.shape

(192, 225)

In [141]:
income_avg.to_csv('./data/income_avg.csv', index=False)

FOR RENAMING THE COUNTRIES AND CONTINENTS

extra_income_ineq['country_code'] = extra_income_ineq['country_code'].fillna(extra_income_ineq['country'].map(dict_country_code_2))
extra_income_ineq['country'] = extra_income_ineq['country'].replace(rename_dict)
extra_income_ineq.head(50)
extra_income_ineq['nan_count'] = extra_income_ineq.isnull().sum(axis=1)
extra_income_ineq = extra_income_ineq.sort_values(by=['country_code', 'nan_count'])
extra_income_ineq
extra_income_ineq = extra_income_ineq.drop_duplicates(subset='country_code', keep='first')
extra_income_ineq.head(20)
extra_income_ineq.drop('nan_count', axis=1, inplace=True)
extra_income_ineq.to_csv('./data/happiness_score_full.csv', index=False)

In [143]:
women_parliament_full = pd.merge(countries, women_parliament, on='country', how='outer')

In [144]:
women_parliament_full['country_code'] = women_parliament_full['country_code'].fillna(women_parliament_full['country'].map(dict_country_code_2))
women_parliament_full['country'] = women_parliament_full['country'].replace(rename_dict)
women_parliament_full.head(50)

Unnamed: 0,continent,country,country_code,1945,1946,1947,1948,1949,1950,1951,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Asia,Afghanistan,AFG,,,,,,,,...,27.4,27.4,27.4,27.4,27.4,27.7,27.8,27.9,27.2,27.2
1,Europe,Albania,ALB,3.66,6.46,8.81,10.8,12.5,14.1,13.5,...,16.1,15.9,15.7,18.8,21.8,24.8,27.9,28.6,29.5,29.5
2,Africa,Algeria,DZA,,,,,,,,...,23.7,25.8,23.8,19.7,6.94,21.3,25.8,6.82,21.6,21.1
3,Europe,Andorra,AND,,,,,,,,...,51.8,50.0,47.6,44.3,39.3,41.1,42.9,44.6,46.4,46.4
4,Africa,Angola,AGO,,,,,,,,...,34.9,34.1,33.5,32.8,32.2,31.6,30.9,30.4,30.0,30.0
5,North America,Antigua and Barbuda,ATG,,,,,,,,...,20.1,20.5,20.8,21.2,23.9,26.5,29.0,31.4,31.4,31.4
6,South America,Argentina,ARG,,0.0,0.0,0.0,5.99,11.4,16.2,...,37.7,37.7,37.7,37.4,37.1,38.0,38.9,39.4,39.9,40.7
7,Europe,Armenia,ARM,,,,,,,,...,10.2,10.7,11.8,12.9,14.2,15.6,17.1,24.2,23.5,23.5
8,Australia and Oceania,Australia,AUS,,3.6,3.21,2.94,2.73,2.46,2.19,...,30.4,31.4,32.2,31.2,30.3,29.5,31.1,33.4,36.6,37.0
9,Europe,Austria,AUT,4.21,0.0,0.0,0.0,4.23,2.08,2.08,...,29.0,29.0,30.5,29.0,29.0,29.5,33.1,35.8,38.5,38.5


In [145]:
women_parliament_full['nan_count'] = women_parliament_full.isnull().sum(axis=1)
women_parliament_full = women_parliament_full.sort_values(by=['country_code', 'nan_count'])
women_parliament_full

Unnamed: 0,continent,country,country_code,1945,1946,1947,1948,1949,1950,1951,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,nan_count
0,Asia,Afghanistan,AFG,,,,,,,,...,27.40,27.40,27.40,27.40,27.70,27.80,27.90,27.20,27.20,20
4,Africa,Angola,AGO,,,,,,,,...,34.10,33.50,32.80,32.20,31.60,30.90,30.40,30.00,30.00,35
1,Europe,Albania,ALB,3.66,6.46,8.81,10.80,12.50,14.10,13.50,...,15.90,15.70,18.80,21.80,24.80,27.90,28.60,29.50,29.50,0
3,Europe,Andorra,AND,,,,,,,,...,50.00,47.60,44.30,39.30,41.10,42.90,44.60,46.40,46.40,48
200,Asia,United Arab Emirates,ARE,,,,,,,,...,17.50,19.20,20.80,22.50,29.40,36.30,43.10,50.00,50.00,26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159,Australia and Oceania,Samoa,WSM,,,,,,,,...,5.28,6.48,7.66,8.84,10.00,10.00,10.00,10.00,10.00,19
209,Asia,Yemen,YEM,,,,,,,,...,0.74,0.78,0.81,0.85,0.88,0.91,0.94,0.97,0.97,45
173,Africa,South Africa,ZAF,,,,1.31,1.43,1.54,1.66,...,41.70,41.20,40.80,41.30,41.90,42.80,43.90,45.30,45.40,3
210,Africa,Zambia,ZMB,,,,,,,,...,12.50,13.50,14.30,14.90,15.40,15.90,16.70,18.00,16.80,19


In [146]:
women_parliament_full = women_parliament_full.drop_duplicates(subset='country_code', keep='first')
women_parliament_full.head(20)

Unnamed: 0,continent,country,country_code,1945,1946,1947,1948,1949,1950,1951,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,nan_count
0,Asia,Afghanistan,AFG,,,,,,,,...,27.4,27.4,27.4,27.4,27.7,27.8,27.9,27.2,27.2,20
4,Africa,Angola,AGO,,,,,,,,...,34.1,33.5,32.8,32.2,31.6,30.9,30.4,30.0,30.0,35
1,Europe,Albania,ALB,3.66,6.46,8.81,10.8,12.5,14.1,13.5,...,15.9,15.7,18.8,21.8,24.8,27.9,28.6,29.5,29.5,0
3,Europe,Andorra,AND,,,,,,,,...,50.0,47.6,44.3,39.3,41.1,42.9,44.6,46.4,46.4,48
200,Asia,United Arab Emirates,ARE,,,,,,,,...,17.5,19.2,20.8,22.5,29.4,36.3,43.1,50.0,50.0,26
6,South America,Argentina,ARG,,0.0,0.0,0.0,5.99,11.4,16.2,...,37.7,37.7,37.4,37.1,38.0,38.9,39.4,39.9,40.7,1
7,Europe,Armenia,ARM,,,,,,,,...,10.7,11.8,12.9,14.2,15.6,17.1,24.2,23.5,23.5,45
5,North America,Antigua and Barbuda,ATG,,,,,,,,...,20.5,20.8,21.2,23.9,26.5,29.0,31.4,31.4,31.4,39
8,Australia and Oceania,Australia,AUS,,3.6,3.21,2.94,2.73,2.46,2.19,...,31.4,32.2,31.2,30.3,29.5,31.1,33.4,36.6,37.0,1
9,Europe,Austria,AUT,4.21,0.0,0.0,0.0,4.23,2.08,2.08,...,29.0,30.5,29.0,29.0,29.5,33.1,35.8,38.5,38.5,0


In [147]:
women_parliament_full.drop('nan_count', axis=1, inplace=True)
women_parliament_full.to_csv('./data/women_parliament_full.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  women_parliament_full.drop('nan_count', axis=1, inplace=True)


In [148]:
women_parliament_full

Unnamed: 0,continent,country,country_code,1945,1946,1947,1948,1949,1950,1951,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
0,Asia,Afghanistan,AFG,,,,,,,,...,27.40,27.40,27.40,27.40,27.40,27.70,27.80,27.90,27.20,27.20
4,Africa,Angola,AGO,,,,,,,,...,34.90,34.10,33.50,32.80,32.20,31.60,30.90,30.40,30.00,30.00
1,Europe,Albania,ALB,3.66,6.46,8.81,10.80,12.50,14.10,13.50,...,16.10,15.90,15.70,18.80,21.80,24.80,27.90,28.60,29.50,29.50
3,Europe,Andorra,AND,,,,,,,,...,51.80,50.00,47.60,44.30,39.30,41.10,42.90,44.60,46.40,46.40
200,Asia,United Arab Emirates,ARE,,,,,,,,...,5.00,17.50,19.20,20.80,22.50,29.40,36.30,43.10,50.00,50.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159,Australia and Oceania,Samoa,WSM,,,,,,,,...,4.08,5.28,6.48,7.66,8.84,10.00,10.00,10.00,10.00,10.00
209,Asia,Yemen,YEM,,,,,,,,...,0.70,0.74,0.78,0.81,0.85,0.88,0.91,0.94,0.97,0.97
173,Africa,South Africa,ZAF,,,,1.31,1.43,1.54,1.66,...,42.10,41.70,41.20,40.80,41.30,41.90,42.80,43.90,45.30,45.40
210,Africa,Zambia,ZMB,,,,,,,,...,11.00,12.50,13.50,14.30,14.90,15.40,15.90,16.70,18.00,16.80
