In [1]:
import pandas as pd

In [2]:
required_cols = ['Country', 
                 'Population', 
                 'Coastline (coast/area ratio)',
                 'GDP ($ per capita)', 
                 'Literacy (%)', 
                 'Birthrate']

In [3]:
countries = pd.read_csv('datasets/countries_of_the_world.csv', 
                        usecols = required_cols,
                        decimal = ',')

In [4]:
countries.head()

Unnamed: 0,Country,Population,Coastline (coast/area ratio),GDP ($ per capita),Literacy (%),Birthrate
0,Afghanistan,31056997,0.0,700.0,36.0,46.6
1,Albania,3581655,1.26,4500.0,86.5,15.11
2,Algeria,32930091,0.04,6000.0,70.0,17.14
3,American Samoa,57794,58.29,8000.0,97.0,22.46
4,Andorra,71201,0.0,19000.0,100.0,8.71


In [5]:
countries.rename(columns = {'GDP ($ per capita)': 'PerCapitaGDP'}, 
                 inplace = True)

countries.head()

Unnamed: 0,Country,Population,Coastline (coast/area ratio),PerCapitaGDP,Literacy (%),Birthrate
0,Afghanistan,31056997,0.0,700.0,36.0,46.6
1,Albania,3581655,1.26,4500.0,86.5,15.11
2,Algeria,32930091,0.04,6000.0,70.0,17.14
3,American Samoa,57794,58.29,8000.0,97.0,22.46
4,Andorra,71201,0.0,19000.0,100.0,8.71


In [6]:
countries.rename(columns = {'Coastline (coast/area ratio)': 'CoastlineAreaRatio', 
                            'Literacy (%)': 'LiteracyRate'}, 
                 inplace = True)

In [7]:
countries.head()

Unnamed: 0,Country,Population,CoastlineAreaRatio,PerCapitaGDP,LiteracyRate,Birthrate
0,Afghanistan,31056997,0.0,700.0,36.0,46.6
1,Albania,3581655,1.26,4500.0,86.5,15.11
2,Algeria,32930091,0.04,6000.0,70.0,17.14
3,American Samoa,57794,58.29,8000.0,97.0,22.46
4,Andorra,71201,0.0,19000.0,100.0,8.71


In [8]:
countries['Population'].isna().sum()

0

In [9]:
world_population = countries['Population'].sum()

world_population

6524044551

In [10]:
countries.isna().sum()

Country                0
Population             0
CoastlineAreaRatio     0
PerCapitaGDP           1
LiteracyRate          18
Birthrate              3
dtype: int64

In [11]:
countries = countries.dropna()

In [12]:
countries.shape

(207, 6)

In [13]:
countries.LiteracyRate.min()

17.6

In [14]:
countries.LiteracyRate.max()

100.0

In [15]:
countries.LiteracyRate.min()

17.6

In [16]:
countries.LiteracyRate.idxmin()

151

In [17]:
countries['Country'][countries.LiteracyRate.idxmin()]

'Niger '

In [18]:
countries.LiteracyRate.idxmax()

4

In [19]:
countries['LiteracyRate'][countries.LiteracyRate.idxmax()]

100.0

In [20]:
countries[countries['LiteracyRate'] == countries.LiteracyRate.max()]

Unnamed: 0,Country,Population,CoastlineAreaRatio,PerCapitaGDP,LiteracyRate,Birthrate
4,Andorra,71201,0.0,19000.0,100.0,8.71
11,Australia,20264082,0.34,29000.0,100.0,12.14
54,Denmark,5450661,16.97,31100.0,100.0,11.13
68,Finland,5231372,0.37,27400.0,100.0,10.45
119,Liechtenstein,33987,0.0,25000.0,100.0,10.21
121,Luxembourg,474413,0.0,55100.0,100.0,11.94
154,Norway,4610820,7.77,37800.0,100.0,11.46


In [21]:
countries.LiteracyRate.mean()

82.94782608695652

In [22]:
countries.LiteracyRate.median()

92.5

In [23]:
countries.LiteracyRate.std()

19.672122668886164

In [24]:
countries.LiteracyRate.quantile(0.5)

92.5

In [25]:
countries.LiteracyRate.quantile([0.25, 0.5, 0.75])

0.25    72.4
0.50    92.5
0.75    98.0
Name: LiteracyRate, dtype: float64

In [26]:
countries.describe()

Unnamed: 0,Population,CoastlineAreaRatio,PerCapitaGDP,LiteracyRate,Birthrate
count,207.0,207.0,207.0,207.0,207.0
mean,31387080.0,18.773043,9675.362319,82.947826,22.312802
std,123155500.0,73.44982,10170.021706,19.672123,11.188104
min,7026.0,0.0,500.0,17.6,7.29
25%,732915.0,0.1,1900.0,72.4,12.835
50%,5900754.0,0.7,5500.0,92.5,18.9
75%,20243160.0,7.415,14700.0,98.0,29.77
max,1313974000.0,870.66,55100.0,100.0,50.73


In [27]:
countries.LiteracyRate.mode()

0    99.0
dtype: float64

In [28]:
countries.LiteracyRate.skew()

-1.233903412331758

In [29]:
countries.LiteracyRate.kurtosis()

0.4147343707691684

#### Examine relationships between two variables

In [30]:
countries[['CoastlineAreaRatio', 'PerCapitaGDP']].corr()

Unnamed: 0,CoastlineAreaRatio,PerCapitaGDP
CoastlineAreaRatio,1.0,0.048052
PerCapitaGDP,0.048052,1.0


In [31]:
countries[['LiteracyRate', 'PerCapitaGDP']].corr()

Unnamed: 0,LiteracyRate,PerCapitaGDP
LiteracyRate,1.0,0.515122
PerCapitaGDP,0.515122,1.0


In [32]:
countries[['Birthrate', 'PerCapitaGDP']].corr()

Unnamed: 0,Birthrate,PerCapitaGDP
Birthrate,1.0,-0.642601
PerCapitaGDP,-0.642601,1.0


In [33]:
countries.corr()

Unnamed: 0,Population,CoastlineAreaRatio,PerCapitaGDP,LiteracyRate,Birthrate
Population,1.0,-0.061882,-0.039922,-0.045429,-0.05119
CoastlineAreaRatio,-0.061882,1.0,0.048052,0.121786,-0.075255
PerCapitaGDP,-0.039922,0.048052,1.0,0.515122,-0.642601
LiteracyRate,-0.045429,0.121786,0.515122,1.0,-0.790846
Birthrate,-0.05119,-0.075255,-0.642601,-0.790846,1.0


In [35]:
countries.cov()

Unnamed: 0,Population,CoastlineAreaRatio,PerCapitaGDP,LiteracyRate,Birthrate
Population,1.516727e+16,-559771800.0,-50002040000.0,-110061900.0,-70533090.0
CoastlineAreaRatio,-559771800.0,5394.876,35893.89,175.9706,-61.84219
PerCapitaGDP,-50002040000.0,35893.89,103429300.0,103058.3,-73117.28
LiteracyRate,-110061900.0,175.9706,103058.3,386.9924,-174.0603
Birthrate,-70533090.0,-61.84219,-73117.28,-174.0603,125.1737
