In [24]:
# data analysis and wrangling
import pandas as pd
import numpy as np

# visualization
import matplotlib.pyplot as plt
import seaborn as sns


In [25]:
df = pd.read_csv("indicators.csv")
df.head()

Unnamed: 0,CountryName,CountryCode,IndicatorName,IndicatorCode,Year,Value
0,Arab World,ARB,"Adolescent fertility rate (births per 1,000 wo...",SP.ADO.TFRT,1960,133.5609
1,Arab World,ARB,Age dependency ratio (% of working-age populat...,SP.POP.DPND,1960,87.7976
2,Arab World,ARB,"Age dependency ratio, old (% of working-age po...",SP.POP.DPND.OL,1960,6.634579
3,Arab World,ARB,"Age dependency ratio, young (% of working-age ...",SP.POP.DPND.YG,1960,81.02333
4,Arab World,ARB,Arms exports (SIPRI trend indicator values),MS.MIL.XPRT.KD,1960,3000000.0


In [26]:
df.shape[0]

5656458

In [27]:
df['CountryName'].unique().shape

(247,)

In [28]:
df['Year'].unique()

array([1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970,
       1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981,
       1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992,
       1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
       2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014,
       2015], dtype=int64)

In [31]:
for ind in df['IndicatorName'].unique():
    print(ind)

Adolescent fertility rate (births per 1,000 women ages 15-19)
Age dependency ratio (% of working-age population)
Age dependency ratio, old (% of working-age population)
Age dependency ratio, young (% of working-age population)
Arms exports (SIPRI trend indicator values)
Arms imports (SIPRI trend indicator values)
Birth rate, crude (per 1,000 people)
CO2 emissions (kt)
CO2 emissions (metric tons per capita)
CO2 emissions from gaseous fuel consumption (% of total)
CO2 emissions from liquid fuel consumption (% of total)
CO2 emissions from liquid fuel consumption (kt)
CO2 emissions from solid fuel consumption (% of total)
Death rate, crude (per 1,000 people)
Fertility rate, total (births per woman)
Fixed telephone subscriptions
Fixed telephone subscriptions (per 100 people)
Hospital beds (per 1,000 people)
International migrant stock (% of population)
International migrant stock, total
Life expectancy at birth, female (years)
Life expectancy at birth, male (years)
Life expectancy at birth,

In [30]:
indicators = [('Population, total','Population'),
              ('Adult literacy rate, population 15+ years, both sexes (%)','Literacy'),
              ('Unemployment, total (% of total labor force)','Unemployement'),
              ('Primary completion rate, both sexes (%)','Primary education')]

data = [df[df['IndicatorName']==ind[0]][['CountryName','Year','Value']] for ind in indicators]

for i,ind in enumerate(indicators):
    data[i].columns=['CountryName','Year',ind[1]]
    
[len(d) for d in data]

[13484, 737, 4944, 5386]

In [31]:
df2 = pd.merge(data[0],data[1],on=['CountryName','Year'],how='outer')
for i in range(2,len(data)):
    df2 = pd.merge(df2,data[i],on=['CountryName','Year'],how='outer')
df2

Unnamed: 0,CountryName,Year,Population,Literacy,Unemployement,Primary education
0,Arab World,1960,9.249590e+07,,,
1,Caribbean small states,1960,4.190810e+06,,,
2,Central Europe and the Baltics,1960,9.140158e+07,,,
3,East Asia & Pacific (all income levels),1960,1.042475e+09,,,
4,East Asia & Pacific (developing only),1960,8.964930e+08,,,
5,Euro area,1960,2.653965e+08,,,
6,Europe & Central Asia (all income levels),1960,6.674890e+08,,,
7,Europe & Central Asia (developing only),1960,1.553174e+08,,,
8,European Union,1960,4.094985e+08,,,
9,Fragile and conflict affected situations,1960,1.203546e+08,,,


In [32]:
plt.figure(figsize=(8,5))
x = df2[df2["Year"] == 2014]['Population']
y = df2[df2["Year"] == 2014]['Literacy']
plt.scatter(x,y)
plt.show()

<IPython.core.display.Javascript object>

In [33]:
pays = 'United Arab Emirates'
data = df2[df2["CountryName"] == pays][['Year','Population','Literacy','Unemployement','Primary education']]


In [34]:
fig, axs = plt.subplots(2, 2)
fig.set_figheight(8)
fig.set_figwidth(12)
axs[0, 0].plot(data['Year'], data['Population'])

axs[0, 1].plot(data['Year'], data['Literacy'], 'tab:orange')

axs[1, 0].plot(data['Year'], data['Unemployement'], 'tab:green')

axs[1, 1].plot(data['Year'], data['Primary education'], 'tab:red')




<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1929c30de48>]

In [35]:
v = df2[df2["Year"] == 2014]

In [37]:
#plt.figure(figsize=(8,5))
area = v.loc[v['Population'].notna() & v['Literacy'].notna(),'Unemployement']
v.plot.scatter("Population",'Literacy',s = area,c = 'Unemployement',colormap = 'viridis')
#plt.show()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1929dc77a20>

In [43]:
import math
import matplotlib.cm as cm
import matplotlib.animation as animation
#%matplotlib notebook


figure = plt.figure(figsize=(10,10))
plt.scatter(v['Population'],
            v['Literacy'],
            s=area,
            c=v['Unemployement'],
            alpha=0.7,
            picker=5
           )
plt.colorbar()
plt.subplots_adjust(top=0.7)

def onpick(event):
    plt.cla()
    area = v.loc[v['Population'].notna() & v['Literacy'].notna(),'Unemployement'] 
    plt.scatter(v['Population'],
            v['Literacy'],
            s=area,
            c=v['Unemployement'],
            alpha=0.7,
            picker=5
           )
    plt.subplots_adjust(top=0.7)
    index = event.ind[0]
    row = v.iloc[index]
    c = row['CountryName']
    y = row['Year']
    u = row['Unemployement']
    p = int(row['Population'])
    l = row['Literacy']
    pe = row['Primary education']
    #plt.gca().set_title(pe)
    s = 'Country: {}\nYear: {}\nUnemployement: {:.2f}%\nPopulation: {:d}\nLiteracy: {:.2f}%\nPrimary Education: {:.2f}%'.format(c,y,u,p,l,pe)
    plt.gca().set_title(s,loc='left')
    #plt.gca().text(0.5,0.2,s)
    
plt.gcf().canvas.mpl_connect('pick_event', onpick)

<IPython.core.display.Javascript object>

7

In [42]:
import matplotlib.animation as animation
#%matplotlib notebook
un = [v['Population'].min(), v['Population'].max()]
li = [v['Literacy'].min(),v['Literacy'].max()]

def update(current):
    if current == v['Year'].max():
        a.event_source.stop()
    plt.cla()
    d = v[v['Year']==current]
    area = d.loc[d['Population'].notna() & d['Literacy'].notna(),'Unemployement']
    plt.scatter(d['Population'],
                d['Literacy'],
                s=area,
                c=d['Unemployement'],
                cmap='gnuplot',
                alpha=0.7,
                picker=5
               )
   # plt.gca().set_xlim(un)
    #plt.gca().set_ylim(li)
    plt.gca().set_title(current, loc='left')
    
fig = plt.figure()
a = animation.FuncAnimation(fig,
                            update,
                            interval=800,
                            frames=np.arange(v['Year'].min(),v['Year'].max())
                           )

<IPython.core.display.Javascript object>

In [None]:
d = v

In [46]:
un = [df2['Population'].min(), df2['Population'].max()]
li = [d['Literacy'].min(),d['Literacy'].max()]

def update(current):
    if current == df2['Year'].max():
        a.event_source.stop()
    plt.cla()
    d = df2[df2['Year']==current]
    area = d.loc[d['Population'].notna() & d['Literacy'].notna(),'Unemployement']
    plt.scatter(d['Population'],
                d['Literacy'],
                s=area,
                c=d['Unemployement'],
                cmap='gnuplot',
                alpha=0.7,
                picker=5
               )
    plt.gca().set_xlim(un)
    plt.gca().set_ylim(li)
    plt.gca().set_title(current, loc='left')
    
fig = plt.figure()
a = animation.FuncAnimation(fig,
                            update,
                            interval=800,
                            frames=np.arange(df2['Year'].min(),df2['Year'].max())
                           )

<IPython.core.display.Javascript object>