In [1]:
import pandas as pd
import numpy as np
import os
import json
import requests
from pprint import pprint

## Regions

In [45]:
# %run regions.ipynb
regions_df = pd.read_csv('regions.csv')
regions_df.head()

Unnamed: 0,Country_Code,Country_Region,Region
0,ABW,Aruba,Latin America & Caribbean
1,AFG,Afghanistan,South Asia
2,AGO,Angola,Sub-Saharan Africa
3,ALB,Albania,Europe & Central Asia
4,AND,Andorra,Europe & Central Asia


In [3]:
def country_to_code(country):
    country_code = regions_df.loc[regions_df['Country_Region'] == country, 'Country_Code'].values[0]
    return country_code

In [4]:
# lookup = regions_df.loc[regions_df['Country_Region']=='China']
# lookup = regions_df[regions_df['Country_Region'].str.contains("Korea")]
# lookup

## Population

In [5]:
def get_population(country_code, indicator_code):

    # API Call
    url = f"https://api.worldbank.org/v2/country/{country_code}/indicator/{indicator_code}?format=json"
    response = requests.get(url).json()
    pages = response[0]['pages']
    results = response[1]

    # Loop through results
    for page in range(pages):
        for i in range(len(results)):

            population = results[i]['value']
            if population is None:
                pass
            else:
                return population
                # Only the most recent value, then break
                break

In [6]:
def get_pop_by_age(country_code, i):
    if i < 8:
        pop_segment = get_population(country_code, f'SP.POP.{i}0{i}4.FE') + get_population(country_code, f'SP.POP.{i}5{i}9.FE') + get_population(country_code, f'SP.POP.{i}0{i}4.MA') + get_population(country_code, f'SP.POP.{i}5{i}9.MA')
        return pop_segment
    else:
        pop_segment = get_population(country_code, f'SP.POP.{i}0UP.FE') + get_population(country_code, f'SP.POP.{i}0UP.MA')
        return pop_segment

In [7]:
def get_pop_by_country(country_region):
    # Translate Country Name to Country Code
#     country_code = regions_df.loc[regions_df['Country_Region'] == country_region, 'Country_Code'].values[0]
    country_code = country_to_code(country_region)

    # Append row to dataframe
    row = {
        'Country_Code': country_code, 
    #             'Population': results[i][], 
    #             'Pop Density': results[i][], 
        'Ages 0-9': get_pop_by_age(country_code, 0), 
        'Ages 10-19': get_pop_by_age(country_code, 1), 
        'Ages 20-29': get_pop_by_age(country_code, 2), 
        'Ages 30-39': get_pop_by_age(country_code, 3), 
        'Ages 40-49': get_pop_by_age(country_code, 4), 
        'Ages 50-59': get_pop_by_age(country_code, 5), 
        'Ages 60-69': get_pop_by_age(country_code, 6), 
        'Ages 70-79': get_pop_by_age(country_code, 7), 
        'Ages 80+': get_pop_by_age(country_code, 8),
    }
    
    return row

In [8]:
countries = ['China',
          'Italy',
          'United States',
          'Spain',
          'Germany',
          'Iran, Islamic Rep.',
          'France',
          'Switzerland',
          'United Kingdom',
          'Korea, Rep.',
          'Netherlands',
          'Belgium']

In [9]:
# New empty dataframe
population_df = pd.DataFrame(columns=['Country_Code', 
#                                       'Population', 'Pop Density', 
                                      'Ages 0-9', 'Ages 10-19', 
                                      'Ages 20-29', 'Ages 30-39', 
                                      'Ages 40-49', 'Ages 50-59', 'Ages 60-69', 
                                      'Ages 70-79', 'Ages 80+'])

for i in range(len(countries)):
    try:
        # Get population details for country
        row = get_pop_by_country(countries[i])
        # Append row to dataframe
        population_df = population_df.append(row, ignore_index=True)        
        # Status check
        print(f'{countries[i]} DONE')
        
    # Pass if no population data for country    
    except:
        pass

China DONE
Italy DONE
United States DONE
Spain DONE
Germany DONE
Iran, Islamic Rep. DONE
France DONE
Switzerland DONE
United Kingdom DONE
Korea, Rep. DONE
Netherlands DONE
Belgium DONE


In [10]:
population_df

Unnamed: 0,Country_Code,Ages 0-9,Ages 10-19,Ages 20-29,Ages 30-39,Ages 40-49,Ages 50-59,Ages 60-69,Ages 70-79,Ages 80+
0,CHN,167867076,162776298,197394843,210600646,225033373,200823077,141470402,62009483,24754803
1,ITA,5194456,5714920,6151174,7197156,9376514,9280387,7319860,5833668,4363150
2,USA,40103668,42374625,46205897,43324949,40335465,42989751,37156388,21972242,12704444
3,ESP,4439747,4616640,4693905,6445348,7888167,6847126,5083479,3815303,2894032
4,DEU,7511936,7954609,9438141,10591589,10627081,13493453,10031015,7767350,5512750
5,IRN,14108719,11349425,13509606,16342501,10801649,7763121,4844673,2160269,920303
6,FRA,7927395,8072880,7694701,8220137,8764800,8826632,8015842,5343874,4120982
7,CHE,864365,835163,1052867,1198649,1194576,1288656,935376,707492,439400
8,GBR,8000007,7427169,8604680,9004636,8593596,8967963,7108426,5431790,3350725
9,KOR,4373524,5102131,6880039,7397676,8516856,8477470,5855213,3374777,1657570


In [11]:
# Save CSV
file = os.path.join('population.csv')
population_df.to_csv(file, index=False, header=True)

## Population Density

In [41]:
population_df2 = population_df
population_df2 = population_df2.set_index('Country_Code')
population_df2['Population Total'] = population_df2.sum(axis=1, skipna=True)
population_df2

Unnamed: 0_level_0,Ages 0-9,Ages 10-19,Ages 20-29,Ages 30-39,Ages 40-49,Ages 50-59,Ages 60-69,Ages 70-79,Ages 80+,Population Total
Country_Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
CHN,167867076,162776298,197394843,210600646,225033373,200823077,141470402,62009483,24754803,1392730000.0
ITA,5194456,5714920,6151174,7197156,9376514,9280387,7319860,5833668,4363150,60431280.0
USA,40103668,42374625,46205897,43324949,40335465,42989751,37156388,21972242,12704444,327167400.0
ESP,4439747,4616640,4693905,6445348,7888167,6847126,5083479,3815303,2894032,46723750.0
DEU,7511936,7954609,9438141,10591589,10627081,13493453,10031015,7767350,5512750,82927920.0
IRN,14108719,11349425,13509606,16342501,10801649,7763121,4844673,2160269,920303,81800270.0
FRA,7927395,8072880,7694701,8220137,8764800,8826632,8015842,5343874,4120982,66987240.0
CHE,864365,835163,1052867,1198649,1194576,1288656,935376,707492,439400,8516544.0
GBR,8000007,7427169,8604680,9004636,8593596,8967963,7108426,5431790,3350725,66488990.0
KOR,4373524,5102131,6880039,7397676,8516856,8477470,5855213,3374777,1657570,51635260.0


In [38]:
pop_density = pd.DataFrame(columns=['Country_Code', 'Pop Density (per sqkm)'])

for i in range(len(countries)):
    try:
        # Translate Country Name to Country Code
#         country_code = regions_df.loc[regions_df['Country_Region'] == countries[i], 'Country_Code'].values[0]
        country_code = country_to_code(countries[i])

        # Get population details for country
        row = {
            'Country_Code': country_code, 
            'Pop Density (per sqkm)': get_population(country_code, 'EN.POP.DNST')
        }
        # Append row to dataframe
        pop_density = pop_density.append(row, ignore_index=True)        
        # Status check
        print(f'{countries[i]} DONE')
        
    # Pass if no population data for country    
    except:
        pass
    
pop_density = pop_density.set_index('Country_Code')
pop_density

China DONE
Italy DONE
United States DONE
Spain DONE
Germany DONE
Iran, Islamic Rep. DONE
France DONE
Switzerland DONE
United Kingdom DONE
Korea, Rep. DONE
Netherlands DONE
Belgium DONE


Unnamed: 0_level_0,Pop Density (per sqkm)
Country_Code,Unnamed: 1_level_1
CHN,148.348833
ITA,205.450748
USA,35.766089
ESP,93.529058
DEU,237.37097
IRN,50.22242
FRA,122.338396
CHE,215.521378
GBR,274.827392
KOR,529.652104


In [42]:
population_df2 = pd.merge(pop_density, population_df2, on='Country_Code', how='inner')
population_df2

Unnamed: 0_level_0,Pop Density (per sqkm),Ages 0-9,Ages 10-19,Ages 20-29,Ages 30-39,Ages 40-49,Ages 50-59,Ages 60-69,Ages 70-79,Ages 80+,Population Total
Country_Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
CHN,148.348833,167867076,162776298,197394843,210600646,225033373,200823077,141470402,62009483,24754803,1392730000.0
ITA,205.450748,5194456,5714920,6151174,7197156,9376514,9280387,7319860,5833668,4363150,60431280.0
USA,35.766089,40103668,42374625,46205897,43324949,40335465,42989751,37156388,21972242,12704444,327167400.0
ESP,93.529058,4439747,4616640,4693905,6445348,7888167,6847126,5083479,3815303,2894032,46723750.0
DEU,237.37097,7511936,7954609,9438141,10591589,10627081,13493453,10031015,7767350,5512750,82927920.0
IRN,50.22242,14108719,11349425,13509606,16342501,10801649,7763121,4844673,2160269,920303,81800270.0
FRA,122.338396,7927395,8072880,7694701,8220137,8764800,8826632,8015842,5343874,4120982,66987240.0
CHE,215.521378,864365,835163,1052867,1198649,1194576,1288656,935376,707492,439400,8516544.0
GBR,274.827392,8000007,7427169,8604680,9004636,8593596,8967963,7108426,5431790,3350725,66488990.0
KOR,529.652104,4373524,5102131,6880039,7397676,8516856,8477470,5855213,3374777,1657570,51635260.0


In [44]:
# Save CSV
file = os.path.join('population.csv')
population_df2.to_csv(file, index=True, header=True)