In [1]:
!pip install wbdata
import wbdata

Collecting wbdata
  Using cached wbdata-0.3.0-py3-none-any.whl (14 kB)
Installing collected packages: wbdata
Successfully installed wbdata-0.3.0


In [3]:
# Data from WDI on age-sex comes in the forms of variables
# which take the form "SP.POP.LLHH.MA" for males
# and "SP.POP.LLHH.FE" for females, where LL is the *low* end of
# age range, like "05" for 5-yo, and HH is the *high* end.

# We construct a list of age-ranges.

# Start with an empty list of age-rages
age_ranges = []

# Ranges top out at 80, and go in five year increments
for i in range(0,80,5):
    age_ranges.append(f"{i:02d}"+f"{i+4:02d}")

age_ranges.append("80UP")

print(age_ranges)

['0004', '0509', '1014', '1519', '2024', '2529', '3034', '3539', '4044', '4549', '5054', '5559', '6064', '6569', '7074', '7579', '80UP']


In [4]:
male_variables = {"SP.POP."+age_range+".MA":"males "+age_range for age_range in age_ranges}
female_variables = {"SP.POP."+age_range+".FE":"females "+age_range for age_range in age_ranges}

variables = male_variables
variables.update(female_variables)

print(variables)

{'SP.POP.0004.MA': 'males 0004', 'SP.POP.0509.MA': 'males 0509', 'SP.POP.1014.MA': 'males 1014', 'SP.POP.1519.MA': 'males 1519', 'SP.POP.2024.MA': 'males 2024', 'SP.POP.2529.MA': 'males 2529', 'SP.POP.3034.MA': 'males 3034', 'SP.POP.3539.MA': 'males 3539', 'SP.POP.4044.MA': 'males 4044', 'SP.POP.4549.MA': 'males 4549', 'SP.POP.5054.MA': 'males 5054', 'SP.POP.5559.MA': 'males 5559', 'SP.POP.6064.MA': 'males 6064', 'SP.POP.6569.MA': 'males 6569', 'SP.POP.7074.MA': 'males 7074', 'SP.POP.7579.MA': 'males 7579', 'SP.POP.80UP.MA': 'males 80UP', 'SP.POP.0004.FE': 'females 0004', 'SP.POP.0509.FE': 'females 0509', 'SP.POP.1014.FE': 'females 1014', 'SP.POP.1519.FE': 'females 1519', 'SP.POP.2024.FE': 'females 2024', 'SP.POP.2529.FE': 'females 2529', 'SP.POP.3034.FE': 'females 3034', 'SP.POP.3539.FE': 'females 3539', 'SP.POP.4044.FE': 'females 4044', 'SP.POP.4549.FE': 'females 4549', 'SP.POP.5054.FE': 'females 5054', 'SP.POP.5559.FE': 'females 5559', 'SP.POP.6064.FE': 'females 6064', 'SP.POP.6569.

In [12]:
# WLD is the World; substitute your own code or list of codes.
# Remember you can search for the appropriate codes using
# wbdata.search_countries("")

df = wbdata.get_dataframe(variables,country="WLD")
print(df)
print(df.query("date=='2020'").sum(axis=0))

       Males 0004   Males 0509   Males 1014   Males 1519   Males 2024  \
date                                                                    
2021  344650023.0  351823148.0  340134768.0  321211465.0  309096185.0   
2020  348843527.0  350861843.0  336694403.0  319186586.0  307711749.0   
2019  351934193.0  349385852.0  332876020.0  317132294.0  306772989.0   
2018  354088174.0  347389724.0  329029796.0  315050425.0  306524173.0   
2017  355156429.0  344546700.0  325497715.0  313205095.0  306617594.0   
...           ...          ...          ...          ...          ...   
1964  239069096.0  208246346.0  184392252.0  148807806.0  127455138.0   
1963  230599710.0  206420446.0  178823880.0  142714269.0  127354732.0   
1962  224041736.0  202783378.0  172741508.0  137464629.0  127460456.0   
1961  222196886.0  198854626.0  166558402.0  133606813.0  127266768.0   
1960  223557787.0  194920004.0  160122897.0  132065258.0  126570596.0   

       Males 2529   Males 3034   Males 3539   Male

In [65]:
def population(year, sex, age_group, country):
    
    country_label = wbdata.search_countries(country)[0]['id']

    df = wbdata.get_dataframe(variables,country=country_label).loc[[str(year)]]

    # for males
    if sex == 'male':
        start_idx = age_group[0] // 5
        end_idx = age_group[1] // 5 + 1
        df2 = df.iloc[:, start_idx:end_idx]

    # for females
    if sex == 'female':
        start_idx = age_group[0] // 5 + len(df.columns)//2
        end_idx = age_group[1] // 5 + 1 + len(df.columns)//2
        df2 = df.iloc[:, start_idx:end_idx]

    if sex == 'people':
        start_idx_m = age_group[0] // 5
        end_idx_m = age_group[1] // 5 + 1
        start_idx_f = age_group[0] // 5 + len(df.columns)//2
        end_idx_f = age_group[1] // 5 + 1 + len(df.columns)//2

        df2 = df.iloc[:, start_idx_m:end_idx_m]
        df3 = df.iloc[:, start_idx_f:end_idx_f]
        df2 = df2.join(df3)
    sum = df2.sum(axis='columns')[0]
    
    return sum
    

In [8]:
wbdata.search_countries("France")

id    name
----  ------
FRA   France

In [60]:
date = '2020'
sex = 'people'
age_group = (0,9)
country = 'India'

country_label = wbdata.search_countries(country)[0]['id']

df = wbdata.get_dataframe(variables,country=country_label).loc[[date]]

# for males
if sex == 'male':
    start_idx = age_group[0] // 5
    end_idx = age_group[1] // 5 + 1
    df2 = df.iloc[:, start_idx:end_idx]

# for females
if sex == 'female':
    start_idx = age_group[0] // 5 + len(df.columns)//2
    end_idx = age_group[1] // 5 + 1 + len(df.columns)//2
    df2 = df.iloc[:, start_idx:end_idx]

if sex == 'people':
    start_idx_m = age_group[0] // 5
    end_idx_m = age_group[1] // 5 + 1
    start_idx_f = age_group[0] // 5 + len(df.columns)//2
    end_idx_f = age_group[1] // 5 + 1 + len(df.columns)//2
    
    df2 = df.iloc[:, start_idx_m:end_idx_m]
    df3 = df.iloc[:, start_idx_f:end_idx_f]
    df2 = df2.join(df3)

print(df)
#colname
sum = df2.sum(axis='columns')[0]
sum

      males 0004  males 0509  males 1014  males 1519  males 2024  males 2529  \
date                                                                           
2020  60771735.0  63921193.0  65906417.0  67689391.0  65745248.0  62448056.0   

      males 3034  males 3539  males 4044  males 4549  ...  females 3539  \
date                                                  ...                 
2020  58647559.0  53259451.0  46635684.0  41235304.0  ...    49403314.0   

      females 4044  females 4549  females 5054  females 5559  females 6064  \
date                                                                         
2020    43621841.0    39150349.0    34130103.0    29401712.0    24542975.0   

      females 6569  females 7074  females 7579  females 80UP  
date                                                          
2020    19232661.0    12886752.0     8263871.0     8441710.0  

[1 rows x 34 columns]


238907978.0

In [72]:
# test
population(year = 2020, sex = 'people', age_group = (0,72) , country = 'India')

1366273444.0

In [67]:
def population_df(year, country):
    
    country_label = wbdata.search_countries(country)[0]['id']

    df = wbdata.get_dataframe(variables,country=country_label).loc[[str(year)]]
    
    return df

In [69]:
population_df(year=2010, country='India')

Unnamed: 0_level_0,males 0004,males 0509,males 1014,males 1519,males 2024,males 2529,males 3034,males 3539,males 4044,males 4549,...,females 3539,females 4044,females 4549,females 5054,females 5559,females 6064,females 6569,females 7074,females 7579,females 80UP
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010,66762871.0,68255358.0,66546666.0,63573345.0,59980845.0,54762384.0,48366584.0,43303427.0,38034285.0,33541016.0,...,40312726.0,35674000.0,31620289.0,27284255.0,22666716.0,16756598.0,12493226.0,9550058.0,6198965.0,5282991.0
