## Population

In [1]:
import pandas as pd
import os
import json
import requests
from pprint import pprint

regions_df = pd.read_csv('regions.csv')

In [7]:
def get_population(country_code, indicator_code):

    # API Call
    url = f"https://api.worldbank.org/v2/country/{country_code}/indicator/{indicator_code}?format=json"
    response = requests.get(url).json()
    pages = response[0]['pages']
    results = response[1]

    # Loop through results
#     for page in range(pages):
    for i in range(len(results)):

        population = results[i]['value']
        if population is None:
            pass
        else:
            return population
            # Only the most recent value, then break
            break

In [8]:
def get_pop_by_age(country_code, i):
    # Ages < 80
    if i < 8:
        pop_segment = get_population(country_code, f'SP.POP.{i}0{i}4.FE') + get_population(country_code, f'SP.POP.{i}5{i}9.FE') + get_population(country_code, f'SP.POP.{i}0{i}4.MA') + get_population(country_code, f'SP.POP.{i}5{i}9.MA')
        return pop_segment
    # Ages >= 80
    else:
        pop_segment = get_population(country_code, f'SP.POP.{i}0UP.FE') + get_population(country_code, f'SP.POP.{i}0UP.MA')
        return pop_segment

In [9]:
def get_pop_by_country(country_code):
    row = {
        'Country_Code': country_code, 
        'Ages 0-9': get_pop_by_age(country_code, 0), 
        'Ages 10-19': get_pop_by_age(country_code, 1), 
        'Ages 20-29': get_pop_by_age(country_code, 2), 
        'Ages 30-39': get_pop_by_age(country_code, 3), 
        'Ages 40-49': get_pop_by_age(country_code, 4), 
        'Ages 50-59': get_pop_by_age(country_code, 5), 
        'Ages 60-69': get_pop_by_age(country_code, 6), 
        'Ages 70-79': get_pop_by_age(country_code, 7), 
        'Ages 80+': get_pop_by_age(country_code, 8),
    }
    
    return row

In [14]:
# New empty dataframe
population_df = pd.DataFrame(columns=['Country_Code', 
                                      'Ages 0-9', 'Ages 10-19', 
                                      'Ages 20-29', 'Ages 30-39', 
                                      'Ages 40-49', 'Ages 50-59', 'Ages 60-69', 
                                      'Ages 70-79', 'Ages 80+'])

countries = regions_df['Country_Code']

for i in range(len(countries)):
    try:
        # Get population details for country
        row = get_pop_by_country(countries[i])
        # Append row to dataframe
        population_df = population_df.append(row, ignore_index=True)        
        # Status check
        print(f'{countries[i]} DONE')
        
    # Pass if no population data for country    
    except:
        pass

In [19]:
# Total Population
population_df.set_index('Country_Code', inplace=True)
population_df['Population Total'] = population_df.sum(axis=1, skipna=True)
population_df.head()

Unnamed: 0_level_0,Ages 0-9,Ages 10-19,Ages 20-29,Ages 30-39,Ages 40-49,Ages 50-59,Ages 60-69,Ages 70-79,Ages 80+,Population Total
Country_Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ABW,11833,14370,14263,11428,14851,17549,12771,6324,2457,105846.0
AFG,10989810,9442594,6523989,4238075,2736927,1711924,989228,437260,102581,37172388.0
AGO,10392615,7290070,4980375,3435680,2220426,1382060,709937,314466,84135,30809764.0
ALB,332306,388705,486304,361449,336137,394706,303223,185136,78410,2866376.0
ARE,999201,817800,2134342,3008905,1684722,746683,180300,47648,11361,9630962.0
ARG,7433647,7105821,6996241,6379363,5463151,4326207,3441452,2177582,1171039,44494503.0
ARM,422128,355545,452958,482515,337371,389390,295140,123354,93371,2951772.0
ATG,14416,14153,14775,14063,13933,12259,7327,3715,1641,96282.0
AUS,3252375,3039992,3440570,3606209,3280091,3094585,2572718,1694596,1011234,24992370.0
AUT,844898,871675,1124019,1196931,1215151,1381306,965860,781949,465250,8847039.0


In [20]:
# Population Density (column)
pop_density = pd.DataFrame(columns=['Country_Code', 'Pop Density (per sqkm)'])

for i in range(len(countries)):
    try:
        country_code = country_to_code(countries[i])

        # Get population details for country
        row = {
            'Country_Code': country_code, 
            'Pop Density (per sqkm)': get_population(country_code, 'EN.POP.DNST')
        }
        # Append row to dataframe
        pop_density = pop_density.append(row, ignore_index=True)        
        # Status check
        print(f'{countries[i]} DONE')
        
    # Pass if no population data for country    
    except:
        pass
    
pop_density.set_index('Country_Code', inplace=True)
pop_density

Aruba DONE
Afghanistan DONE
Angola DONE
Albania DONE
Andorra DONE
United Arab Emirates DONE
Argentina DONE
Armenia DONE
American Samoa DONE
Antigua and Barbuda DONE
Australia DONE
Austria DONE
Azerbaijan DONE
Burundi DONE
Belgium DONE
Benin DONE
Burkina Faso DONE
Bangladesh DONE
Bulgaria DONE
Bahrain DONE
Bahamas, The DONE
Bosnia and Herzegovina DONE
Belarus DONE
Belize DONE
Bermuda DONE
Bolivia DONE
Brazil DONE
Barbados DONE
Brunei Darussalam DONE
Bhutan DONE
Botswana DONE
Central African Republic DONE
Canada DONE
Switzerland DONE
Channel Islands DONE
Chile DONE
China DONE
Cote d'Ivoire DONE
Cameroon DONE
Congo, Dem. Rep. DONE
Congo, Rep. DONE
Colombia DONE
Comoros DONE
Cabo Verde DONE
Costa Rica DONE
Cuba DONE
Curacao DONE
Cayman Islands DONE
Cyprus DONE
Czech Republic DONE
Germany DONE
Djibouti DONE
Dominica DONE
Denmark DONE
Dominican Republic DONE
Algeria DONE
Ecuador DONE
Egypt, Arab Rep. DONE
Eritrea DONE
Spain DONE
Estonia DONE
Ethiopia DONE
Finland DONE
Fiji DONE
France DONE
F

Unnamed: 0_level_0,Pop Density (per sqkm)
Country_Code,Unnamed: 1_level_1
ABW,588.027778
AFG,56.937760
AGO,24.713052
ALB,104.612263
AND,163.842553
ARE,135.609110
ARG,16.258510
ARM,103.680225
ASM,277.325000
ATG,218.831818


In [21]:
population_df = pd.merge(pop_density, population_df, on='Country_Code', how='inner')
population_df.head()

Unnamed: 0_level_0,Pop Density (per sqkm),Ages 0-9,Ages 10-19,Ages 20-29,Ages 30-39,Ages 40-49,Ages 50-59,Ages 60-69,Ages 70-79,Ages 80+,Population Total
Country_Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
ABW,588.027778,11833,14370,14263,11428,14851,17549,12771,6324,2457,105846.0
AFG,56.937760,10989810,9442594,6523989,4238075,2736927,1711924,989228,437260,102581,37172388.0
AGO,24.713052,10392615,7290070,4980375,3435680,2220426,1382060,709937,314466,84135,30809764.0
ALB,104.612263,332306,388705,486304,361449,336137,394706,303223,185136,78410,2866376.0
ARE,135.609110,999201,817800,2134342,3008905,1684722,746683,180300,47648,11361,9630962.0
ARG,16.258510,7433647,7105821,6996241,6379363,5463151,4326207,3441452,2177582,1171039,44494503.0
ARM,103.680225,422128,355545,452958,482515,337371,389390,295140,123354,93371,2951772.0
ATG,218.831818,14416,14153,14775,14063,13933,12259,7327,3715,1641,96282.0
AUS,3.249129,3252375,3039992,3440570,3606209,3280091,3094585,2572718,1694596,1011234,24992370.0
AUT,107.206927,844898,871675,1124019,1196931,1215151,1381306,965860,781949,465250,8847039.0


In [None]:
# Save CSV
# pop_prcnt.to_csv('population.csv', index=False, header=True)

## Population Percentages

In [4]:
pop_prcnt = pd.DataFrame(data=[], columns=population_df.columns)

for row, index in population_df.iterrows():
    pop_total = population_df.iloc[row,11]
    
    pop_prcnt = pop_prcnt.append({
        'Country_Code': population_df.iloc[row,0],
        'Pop Density (per sqkm)':  population_df.iloc[row,1],
        'Ages 0-9': population_df.iloc[row,2]/pop_total,
        'Ages 10-19': population_df.iloc[row,3]/pop_total,
        'Ages 20-29': population_df.iloc[row,4]/pop_total,
        'Ages 30-39': population_df.iloc[row,5]/pop_total,
        'Ages 40-49': population_df.iloc[row,6]/pop_total,
        'Ages 50-59': population_df.iloc[row,7]/pop_total,
        'Ages 60-69': population_df.iloc[row,8]/pop_total,
        'Ages 70-79': population_df.iloc[row,9]/pop_total,
        'Ages 80+': population_df.iloc[row,10]/pop_total,
        'Population Total': pop_total,
    }, ignore_index=True)

pop_prcnt.head()

Unnamed: 0,Country_Code,Pop Density (per sqkm),Ages 0-9,Ages 10-19,Ages 20-29,Ages 30-39,Ages 40-49,Ages 50-59,Ages 60-69,Ages 70-79,Ages 80+,Population Total
0,ABW,588.027778,0.111794,0.135763,0.134752,0.107968,0.140308,0.165797,0.120656,0.059747,0.023213,105846.0
1,AFG,56.93776,0.295644,0.254022,0.175506,0.114011,0.073628,0.046054,0.026612,0.011763,0.00276,37172388.0
2,AGO,24.713052,0.337316,0.236616,0.161649,0.111513,0.072069,0.044858,0.023043,0.010207,0.002731,30809764.0
3,ALB,104.612263,0.115932,0.135609,0.169658,0.1261,0.117269,0.137702,0.105786,0.064589,0.027355,2866376.0
4,ARE,135.60911,0.103749,0.084914,0.221613,0.31242,0.174928,0.077529,0.018721,0.004947,0.00118,9630962.0


In [5]:
# Save CSV
# pop_prcnt.to_csv('population.csv', index=False, header=True)

In [2]:
population_df = pd.read_csv('population.csv')
pop_density = pd.merge(regions_df, population_df, on='Country_Code', how='inner')
pop_density.sort_values('Population Total', ascending=False, inplace=True)
pop_density.head(20)

Unnamed: 0,Country_Code,Pop Density (per sqkm),Ages 0-9,Ages 10-19,Ages 20-29,Ages 30-39,Ages 40-49,Ages 50-59,Ages 60-69,Ages 70-79,Ages 80+,Population Total
115,MNG,2.040609,0.224997,0.146992,0.169913,0.168886,0.128241,0.093408,0.043256,0.017998,0.006309,3170214
8,AUS,3.249129,0.130135,0.121637,0.137665,0.144292,0.131244,0.123821,0.102940,0.067805,0.040462,24992370
84,ISL,3.526923,0.131483,0.129945,0.143965,0.137030,0.125748,0.126045,0.105987,0.062468,0.037330,353573
151,SUR,3.692250,0.182452,0.176573,0.164516,0.139350,0.126482,0.108247,0.058359,0.030780,0.013240,575987
98,LBY,3.795632,0.193795,0.171548,0.166500,0.174387,0.147795,0.079835,0.038475,0.020074,0.007591,6678565
...,...,...,...,...,...,...,...,...,...,...,...,...
17,BHR,2017.273700,0.134587,0.109266,0.205232,0.268813,0.148461,0.084932,0.035022,0.010276,0.003411,1569440
74,HKG,7096.190476,0.082736,0.076166,0.124081,0.152858,0.152591,0.168482,0.131618,0.062343,0.049123,7450999
145,SGP,7952.998418,0.080917,0.095231,0.146453,0.156891,0.167597,0.164811,0.125190,0.041423,0.021485,5638677
143,SDN,,0.282054,0.232615,0.170760,0.117938,0.084389,0.056906,0.034176,0.016174,0.004987,41801532
