In [1]:
!pip install wbdata


import wbdata
import pandas as pd

Collecting wbdata
  Using cached wbdata-0.3.0-py3-none-any.whl (14 kB)
Installing collected packages: wbdata
Successfully installed wbdata-0.3.0


In [2]:
# Data from WDI on age-sex comes in the forms of variables
# which take the form "SP.POP.LLHH.MA" for males
# and "SP.POP.LLHH.FE" for females, where LL is the *low* end of
# age range, like "05" for 5-yo, and HH is the *high* end.

# We construct a list of age-ranges.

# Start with an empty list of age-rages
age_ranges = []

# Ranges top out at 80, and go in five year increments
for i in range(0,80,5):
    age_ranges.append(f"{i:02d}"+f"{i+4:02d}")

age_ranges.append("80UP")

print(age_ranges)

['0004', '0509', '1014', '1519', '2024', '2529', '3034', '3539', '4044', '4549', '5054', '5559', '6064', '6569', '7074', '7579', '80UP']


In [3]:
male_variables = {"SP.POP."+age_range+".MA":"males "+age_range for age_range in age_ranges}
female_variables = {"SP.POP."+age_range+".FE":"females "+age_range for age_range in age_ranges}

variables = male_variables
variables.update(female_variables)

print(variables)

{'SP.POP.0004.MA': 'males 0004', 'SP.POP.0509.MA': 'males 0509', 'SP.POP.1014.MA': 'males 1014', 'SP.POP.1519.MA': 'males 1519', 'SP.POP.2024.MA': 'males 2024', 'SP.POP.2529.MA': 'males 2529', 'SP.POP.3034.MA': 'males 3034', 'SP.POP.3539.MA': 'males 3539', 'SP.POP.4044.MA': 'males 4044', 'SP.POP.4549.MA': 'males 4549', 'SP.POP.5054.MA': 'males 5054', 'SP.POP.5559.MA': 'males 5559', 'SP.POP.6064.MA': 'males 6064', 'SP.POP.6569.MA': 'males 6569', 'SP.POP.7074.MA': 'males 7074', 'SP.POP.7579.MA': 'males 7579', 'SP.POP.80UP.MA': 'males 80UP', 'SP.POP.0004.FE': 'females 0004', 'SP.POP.0509.FE': 'females 0509', 'SP.POP.1014.FE': 'females 1014', 'SP.POP.1519.FE': 'females 1519', 'SP.POP.2024.FE': 'females 2024', 'SP.POP.2529.FE': 'females 2529', 'SP.POP.3034.FE': 'females 3034', 'SP.POP.3539.FE': 'females 3539', 'SP.POP.4044.FE': 'females 4044', 'SP.POP.4549.FE': 'females 4549', 'SP.POP.5054.FE': 'females 5054', 'SP.POP.5559.FE': 'females 5559', 'SP.POP.6064.FE': 'females 6064', 'SP.POP.6569.

In [5]:
def population(year, sex, age_group, country):
    
    country_label = wbdata.search_countries(country)[0]['id']

    df = wbdata.get_dataframe(variables,country=country_label).loc[[str(year)]]

    # for males
    if sex == 'male':
        start_idx = age_group[0] // 5
        end_idx = age_group[1] // 5 + 1
        df2 = df.iloc[:, start_idx:end_idx]

    # for females
    if sex == 'female':
        start_idx = age_group[0] // 5 + len(df.columns)//2
        end_idx = age_group[1] // 5 + 1 + len(df.columns)//2
        df2 = df.iloc[:, start_idx:end_idx]

    if sex == 'people':
        start_idx_m = age_group[0] // 5
        end_idx_m = age_group[1] // 5 + 1
        start_idx_f = age_group[0] // 5 + len(df.columns)//2
        end_idx_f = age_group[1] // 5 + 1 + len(df.columns)//2

        df2 = df.iloc[:, start_idx_m:end_idx_m]
        df3 = df.iloc[:, start_idx_f:end_idx_f]
        df2 = df2.join(df3)
    sum = df2.sum(axis='columns')[0]
    
    return sum
    

In [9]:
# test
population(year = 2020, sex = 'people', age_group = (0,72) , country = 'India')

1366273444.0

In [9]:
def population_df(year, country):
    
    country_label = wbdata.search_countries(country)[0]['id']

    df = wbdata.get_dataframe(variables,country=country_label).loc[[str(year)]]
    df = df.transpose()
    # transform dataframe
    pop_df = pd.DataFrame({'Country': country,'Year': year,'Age Range': age_ranges,
        'Male': df.iloc[0:int(len(df)/2),0].tolist(), 'Female': df.iloc[int(len(df)/2):len(df),0].tolist()})
    pop_df['Total'] = pop_df['Male'] + pop_df['Female']
    return pop_df

In [12]:
pop_df = population_df(year=2010, country='India')
#variables.values()
pop_df

Unnamed: 0,Country,Year,Age Range,Male,Female,Total
0,India,2010,0004,66762871.0,60708486.0,127471357.0
1,India,2010,0509,68255358.0,61864151.0,130119509.0
2,India,2010,1014,66546666.0,60188348.0,126735014.0
3,India,2010,1519,63573345.0,58150510.0,121723855.0
4,India,2010,2024,59980845.0,55287870.0,115268715.0
5,India,2010,2529,54762384.0,50361505.0,105123889.0
6,India,2010,3034,48366584.0,44646901.0,93013485.0
7,India,2010,3539,43303427.0,40312726.0,83616153.0
8,India,2010,4044,38034285.0,35674000.0,73708285.0
9,India,2010,4549,33541016.0,31620289.0,65161305.0


In [70]:
# visualize population pyramid
import plotly.offline as py
import plotly.graph_objs as go
import numpy as np

py.init_notebook_mode(connected=True)

# get population dataframe
pop_df = population_df(year=2010, country='India')

# specify layout
layout = go.Layout(barmode='overlay',
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'))

bins = [go.Bar(x = pop_df['Male'],
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),
        go.Bar(x = -pop_df['Female'],
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip'
               )
       ]

fig = go.Figure(data=bins, layout=layout)

#fig.update_layout(xaxis_tickformat = '%')
fig.show()

In [71]:
# create initial plot
year_init = 1960
# get population dataframe
pop_df = population_df(year=year_init, country='India')

# specify layout
layout = go.Layout(barmode='overlay',
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'),
                  title_text=f"Population Pyramind Animation")

bins = [go.Bar(x = pop_df['Male'],
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Men',
               marker=dict(color='purple'),
               hoverinfo='skip'
               ),
        go.Bar(x = -pop_df['Female'],
               y = [int(s[:2])+1 for s in age_ranges],
               orientation='h',
               name='Women',
               marker=dict(color='pink'),
               hoverinfo='skip'
               )
       ]

#fig = go.Figure(data=bins, layout=layout)

#fig.show()

years = range(1960,2030,10)

frames = []
for y in years:
    
    # get population dataframe
    pop_df = population_df(year=y, country='India')

    # update the bins
    bins = [go.Bar(x = pop_df['Male'],
                   y = [int(s[:2])+1 for s in age_ranges],
                   orientation='h',
                   name='Men',
                   marker=dict(color='purple'),
                   hoverinfo='skip'
                   ),
            go.Bar(x = -pop_df['Female'],
                   y = [int(s[:2])+1 for s in age_ranges],
                   orientation='h',
                   name='Women',
                   marker=dict(color='pink'),
                   hoverinfo='skip'
                   )
            ]
    # create the button
    button = {
        "type": "buttons",
        "buttons": [
            {
                "label": "Play",
                "method": "animate",
                "args": [None, {"frame": {"duration": 1000}}],
            }
        ],
    }
    layout = go.Layout(barmode='overlay',
                   yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                   xaxis=go.layout.XAxis(title='Number'), updatemenus=[button], 
                    title_text=f"Population Pyramind Animation")
    
    # create frames
    frame = go.Frame(data=bins, layout=go.Layout(title_text=f"Population Pyramind in Year {y}"))
    frames.append(frame)


fig = go.Figure(data=bins,
                frames=frames,
                layout = layout)
                                 
# show animation
fig.show()