# Population growth, fertility, life expectancy and mortality

# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings

# Options and Settings

In [2]:
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.autolayout'] = True
plt.rcParams['font.size'] = 12
path = os.getcwd()                                         # get current working directory
warnings.simplefilter('ignore')

# Import Data

In [3]:
filepath = os.path.join('datasets', 'Population Growth, Fertility and Mortality Indicators.csv')
df = pd.read_csv(filepath)

# Head and Tail

In [4]:
df

Unnamed: 0,Region/Country/Area,Unnamed: 1,Year,Series,Value,Footnotes,Source,Unnamed: 7
0,1,"Total, all countries or areas",2010,Population annual rate of increase (percent),1.2,Data refers to a 5-year period preceding the r...,"United Nations Population Division, New York, ...",
1,1,"Total, all countries or areas",2010,Total fertility rate (children per women),2.6,Data refers to a 5-year period preceding the r...,"United Nations Population Division, New York, ...",
2,1,"Total, all countries or areas",2010,"Infant mortality for both sexes (per 1,000 liv...",41.0,Data refers to a 5-year period preceding the r...,"United Nations Statistics Division, New York, ...",
3,1,"Total, all countries or areas",2010,"Maternal mortality ratio (deaths per 100,000 p...",248.0,,"World Health Organization (WHO), the United Na...",
4,1,"Total, all countries or areas",2010,Life expectancy at birth for both sexes (years),68.9,Data refers to a 5-year period preceding the r...,"United Nations Population Division, New York, ...",
...,...,...,...,...,...,...,...,...
4894,716,Zimbabwe,2020,Total fertility rate (children per women),3.6,Data refers to a 5-year period preceding the r...,"United Nations Population Division, New York, ...",
4895,716,Zimbabwe,2020,"Infant mortality for both sexes (per 1,000 liv...",38.7,Data refers to a 5-year period preceding the r...,"United Nations Statistics Division, New York, ...",
4896,716,Zimbabwe,2020,Life expectancy at birth for both sexes (years),60.8,Data refers to a 5-year period preceding the r...,"United Nations Population Division, New York, ...",
4897,716,Zimbabwe,2020,Life expectancy at birth for males (years),59.2,Data refers to a 5-year period preceding the r...,"United Nations Population Division, New York, ...",


In [5]:
df.shape

(4899, 8)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4899 entries, 0 to 4898
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Region/Country/Area  4899 non-null   int64  
 1   Unnamed: 1           4899 non-null   object 
 2   Year                 4899 non-null   int64  
 3   Series               4899 non-null   object 
 4   Value                4899 non-null   float64
 5   Footnotes            4263 non-null   object 
 6   Source               4899 non-null   object 
 7   Unnamed: 7           294 non-null    object 
dtypes: float64(1), int64(2), object(5)
memory usage: 306.3+ KB


In [7]:
df.rename(columns={'Region/Country/Area': 'Code', 
                   'Unnamed: 1': 'RegionCountryArea',
                   }, inplace=True)

df      

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value,Footnotes,Source,Unnamed: 7
0,1,"Total, all countries or areas",2010,Population annual rate of increase (percent),1.2,Data refers to a 5-year period preceding the r...,"United Nations Population Division, New York, ...",
1,1,"Total, all countries or areas",2010,Total fertility rate (children per women),2.6,Data refers to a 5-year period preceding the r...,"United Nations Population Division, New York, ...",
2,1,"Total, all countries or areas",2010,"Infant mortality for both sexes (per 1,000 liv...",41.0,Data refers to a 5-year period preceding the r...,"United Nations Statistics Division, New York, ...",
3,1,"Total, all countries or areas",2010,"Maternal mortality ratio (deaths per 100,000 p...",248.0,,"World Health Organization (WHO), the United Na...",
4,1,"Total, all countries or areas",2010,Life expectancy at birth for both sexes (years),68.9,Data refers to a 5-year period preceding the r...,"United Nations Population Division, New York, ...",
...,...,...,...,...,...,...,...,...
4894,716,Zimbabwe,2020,Total fertility rate (children per women),3.6,Data refers to a 5-year period preceding the r...,"United Nations Population Division, New York, ...",
4895,716,Zimbabwe,2020,"Infant mortality for both sexes (per 1,000 liv...",38.7,Data refers to a 5-year period preceding the r...,"United Nations Statistics Division, New York, ...",
4896,716,Zimbabwe,2020,Life expectancy at birth for both sexes (years),60.8,Data refers to a 5-year period preceding the r...,"United Nations Population Division, New York, ...",
4897,716,Zimbabwe,2020,Life expectancy at birth for males (years),59.2,Data refers to a 5-year period preceding the r...,"United Nations Population Division, New York, ...",


In [8]:
df1 = df[['Code', 'RegionCountryArea', 'Year', 'Series', 'Value']]
df1

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
0,1,"Total, all countries or areas",2010,Population annual rate of increase (percent),1.2
1,1,"Total, all countries or areas",2010,Total fertility rate (children per women),2.6
2,1,"Total, all countries or areas",2010,"Infant mortality for both sexes (per 1,000 liv...",41.0
3,1,"Total, all countries or areas",2010,"Maternal mortality ratio (deaths per 100,000 p...",248.0
4,1,"Total, all countries or areas",2010,Life expectancy at birth for both sexes (years),68.9
...,...,...,...,...,...
4894,716,Zimbabwe,2020,Total fertility rate (children per women),3.6
4895,716,Zimbabwe,2020,"Infant mortality for both sexes (per 1,000 liv...",38.7
4896,716,Zimbabwe,2020,Life expectancy at birth for both sexes (years),60.8
4897,716,Zimbabwe,2020,Life expectancy at birth for males (years),59.2


In [9]:
df1.shape

(4899, 5)

In [10]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4899 entries, 0 to 4898
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Code               4899 non-null   int64  
 1   RegionCountryArea  4899 non-null   object 
 2   Year               4899 non-null   int64  
 3   Series             4899 non-null   object 
 4   Value              4899 non-null   float64
dtypes: float64(1), int64(2), object(2)
memory usage: 191.5+ KB


In [11]:
df1.describe(include='object')

Unnamed: 0,RegionCountryArea,Series
count,4899,4899
unique,265,7
top,"Total, all countries or areas",Population annual rate of increase (percent)
freq,21,795


In [12]:
df1['RegionCountryArea'].value_counts()

Total, all countries or areas    21
Mozambique                       21
Malawi                           21
Malaysia                         21
Maldives                         21
                                 ..
Cayman Islands                    3
Bermuda                           3
Isle of Man                       3
Monaco                            3
Sint Maarten (Dutch part)         3
Name: RegionCountryArea, Length: 265, dtype: int64

In [13]:
df1['Series'].value_counts()

Population annual rate of increase (percent)                795
Total fertility rate (children per women)                   693
Infant mortality for both sexes (per 1,000 live births)     693
Life expectancy at birth for both sexes (years)             693
Life expectancy at birth for males (years)                  693
Life expectancy at birth for females (years)                693
Maternal mortality ratio (deaths per 100,000 population)    639
Name: Series, dtype: int64

In [14]:
df1['Year'].value_counts()

2010    1633
2015    1633
2020    1420
2017     213
Name: Year, dtype: int64

In [15]:
df1['Value'].describe()

count    4899.000000
mean       56.634946
std       102.904145
min        -3.400000
25%         2.900000
50%        53.300000
75%        74.200000
max      1360.000000
Name: Value, dtype: float64

# Missing Values Exploration

In [16]:
df1.isna().sum()

Code                 0
RegionCountryArea    0
Year                 0
Series               0
Value                0
dtype: int64

# Grouping and Aggregation

Filter By Region

In [17]:
regions = ['Africa', 'Northern Africa', 'Sub-Saharan Africa', 'Eastern Africa', 'Middle Africa', 'Southern Africa', 'Western Africa', 
           'Americas', 'Northern America', 'Latin America & the Caribbean', 'Caribbean', 'Central America', 'South America', 'Asia', 
           'Central Asia', 'Eastern Asia', 'South-central Asia', 'South-eastern Asia', 'Southern Asia', 'Western Asia', 'Europe', 
           'Eastern Europe', 'Northern Europe', 'Southern Europe', 'Western Europe', 'Oceania', 'Australia and New Zealand', 'Melanesia']

df_reg_filt = df1[df1['RegionCountryArea'].isin(regions)]
df_reg_filt

Unnamed: 0,Code,RegionCountryArea,Year,Series,Value
21,2,Africa,2010,Population annual rate of increase (percent),2.5
22,2,Africa,2010,Total fertility rate (children per women),4.9
23,2,Africa,2010,"Infant mortality for both sexes (per 1,000 liv...",67.7
24,2,Africa,2010,Life expectancy at birth for both sexes (years),56.8
25,2,Africa,2010,Life expectancy at birth for males (years),55.2
...,...,...,...,...,...
577,54,Melanesia,2020,Total fertility rate (children per women),3.5
578,54,Melanesia,2020,"Infant mortality for both sexes (per 1,000 liv...",37.5
579,54,Melanesia,2020,Life expectancy at birth for both sexes (years),65.4
580,54,Melanesia,2020,Life expectancy at birth for males (years),64.1


In [18]:
df_reg_filt.rename(columns={
    'RegionCountryArea': 'Region'
}, inplace=True)

df_reg_filt

Unnamed: 0,Code,Region,Year,Series,Value
21,2,Africa,2010,Population annual rate of increase (percent),2.5
22,2,Africa,2010,Total fertility rate (children per women),4.9
23,2,Africa,2010,"Infant mortality for both sexes (per 1,000 liv...",67.7
24,2,Africa,2010,Life expectancy at birth for both sexes (years),56.8
25,2,Africa,2010,Life expectancy at birth for males (years),55.2
...,...,...,...,...,...
577,54,Melanesia,2020,Total fertility rate (children per women),3.5
578,54,Melanesia,2020,"Infant mortality for both sexes (per 1,000 liv...",37.5
579,54,Melanesia,2020,Life expectancy at birth for both sexes (years),65.4
580,54,Melanesia,2020,Life expectancy at birth for males (years),64.1


Filter Population annual rate of increase in Percentage

In [19]:
percentage = ['Population annual rate of increase (percent)']

df_reg_filt_pct = df_reg_filt[df_reg_filt['Series'].isin(percentage)]
df_reg_filt_pct

Unnamed: 0,Code,Region,Year,Series,Value
21,2,Africa,2010,Population annual rate of increase (percent),2.5
27,2,Africa,2015,Population annual rate of increase (percent),2.6
33,2,Africa,2020,Population annual rate of increase (percent),2.5
39,15,Northern Africa,2010,Population annual rate of increase (percent),1.7
46,15,Northern Africa,2015,Population annual rate of increase (percent),2.0
...,...,...,...,...,...
547,53,Australia and New Zealand,2015,Population annual rate of increase (percent),1.5
555,53,Australia and New Zealand,2020,Population annual rate of increase (percent),1.2
561,54,Melanesia,2010,Population annual rate of increase (percent),2.2
568,54,Melanesia,2015,Population annual rate of increase (percent),1.9


Filter Total fertility rate (children per women) 

In [20]:
tot_fert = ['Total fertility rate (children per women)']

df_reg_filt_totfert = df_reg_filt[df_reg_filt['Series'].isin(tot_fert)]
df_reg_filt_totfert

Unnamed: 0,Code,Region,Year,Series,Value
22,2,Africa,2010,Total fertility rate (children per women),4.9
28,2,Africa,2015,Total fertility rate (children per women),4.7
34,2,Africa,2020,Total fertility rate (children per women),4.4
40,15,Northern Africa,2010,Total fertility rate (children per women),3.1
47,15,Northern Africa,2015,Total fertility rate (children per women),3.3
...,...,...,...,...,...
548,53,Australia and New Zealand,2015,Total fertility rate (children per women),1.9
556,53,Australia and New Zealand,2020,Total fertility rate (children per women),1.8
562,54,Melanesia,2010,Total fertility rate (children per women),3.9
569,54,Melanesia,2015,Total fertility rate (children per women),3.7


Filter Infant mortality for both sexes (per 1,000 live births)

In [21]:
inf_mort = ['Infant mortality for both sexes (per 1,000 live births)']

df_reg_filt_infmort = df_reg_filt[df_reg_filt['Series'].isin(inf_mort)]
df_reg_filt_infmort

Unnamed: 0,Code,Region,Year,Series,Value
23,2,Africa,2010,"Infant mortality for both sexes (per 1,000 liv...",67.7
29,2,Africa,2015,"Infant mortality for both sexes (per 1,000 liv...",55.9
35,2,Africa,2020,"Infant mortality for both sexes (per 1,000 liv...",47.5
41,15,Northern Africa,2010,"Infant mortality for both sexes (per 1,000 liv...",31.7
48,15,Northern Africa,2015,"Infant mortality for both sexes (per 1,000 liv...",26.6
...,...,...,...,...,...
549,53,Australia and New Zealand,2015,"Infant mortality for both sexes (per 1,000 liv...",3.7
557,53,Australia and New Zealand,2020,"Infant mortality for both sexes (per 1,000 liv...",3.2
563,54,Melanesia,2010,"Infant mortality for both sexes (per 1,000 liv...",48.4
570,54,Melanesia,2015,"Infant mortality for both sexes (per 1,000 liv...",43.1


Filter Life expectancy at birth in Years

In [22]:
life_expect = ['Life expectancy at birth for both sexes (years)',
               'Life expectancy at birth for males (years)',     
               'Life expectancy at birth for females (years)']

df_reg_filt_lifeexpect = df_reg_filt[df_reg_filt['Series'].isin(life_expect)]
df_reg_filt_lifeexpect

Unnamed: 0,Code,Region,Year,Series,Value
24,2,Africa,2010,Life expectancy at birth for both sexes (years),56.8
25,2,Africa,2010,Life expectancy at birth for males (years),55.2
26,2,Africa,2010,Life expectancy at birth for females (years),58.3
30,2,Africa,2015,Life expectancy at birth for both sexes (years),60.2
31,2,Africa,2015,Life expectancy at birth for males (years),58.6
...,...,...,...,...,...
573,54,Melanesia,2015,Life expectancy at birth for males (years),62.7
574,54,Melanesia,2015,Life expectancy at birth for females (years),65.5
579,54,Melanesia,2020,Life expectancy at birth for both sexes (years),65.4
580,54,Melanesia,2020,Life expectancy at birth for males (years),64.1


Filter Maternal mortality ratio

In [23]:
mat_mort = ['Maternal mortality ratio (deaths per 100,000 population)']

df_reg_filt_matmort = df_reg_filt[df_reg_filt['Series'].isin(mat_mort)]
df_reg_filt_matmort

Unnamed: 0,Code,Region,Year,Series,Value
42,15,Northern Africa,2010,"Maternal mortality ratio (deaths per 100,000 p...",145.0
49,15,Northern Africa,2015,"Maternal mortality ratio (deaths per 100,000 p...",118.0
53,15,Northern Africa,2017,"Maternal mortality ratio (deaths per 100,000 p...",112.0
63,202,Sub-Saharan Africa,2010,"Maternal mortality ratio (deaths per 100,000 p...",635.0
70,202,Sub-Saharan Africa,2015,"Maternal mortality ratio (deaths per 100,000 p...",566.0
...,...,...,...,...,...
550,53,Australia and New Zealand,2015,"Maternal mortality ratio (deaths per 100,000 p...",7.0
554,53,Australia and New Zealand,2017,"Maternal mortality ratio (deaths per 100,000 p...",7.0
564,54,Melanesia,2010,"Maternal mortality ratio (deaths per 100,000 p...",155.0
571,54,Melanesia,2015,"Maternal mortality ratio (deaths per 100,000 p...",138.0
