# Our Country: Colombia

In [1]:
### Uncomment this to download
# !pip install wbdata
import numpy as np
import pandas as pd
import wbdata
import re

# Population Statistics

In [2]:
def get_indicators_dict(src):
    indicators = wbdata.get_indicator(source = src)
    
    indicator_dict = {}
    for indicator in indicators:
        ID = indicator['id']
        name = indicator['name']
        indicator_dict[name] = ID
        
    return indicator_dict
    

def population(year, sex, age_range, place, whole_df = False, mute=True):
    src = 40
    
    indicator_dict = get_indicators_dict(src)
    
    try:
        try: # For queries like 'Population ages 10-14, female'
            label = f'Population ages {age_range[0]}-{age_range[1]}, {sex.lower()}'
            variable_labels = {(indicator_dict[label]): label}
            
        except: # For queries like 'Population ages 65 and above, female'
            label = f'Population ages {age_range[0]} and {age_range[1]}, {sex.lower()}'
            variable_labels = {(indicator_dict[label]): label}            

        try:
            df = wbdata.get_dataframe(variable_labels, country=place)
            df.index = df.index.astype(int)
            if not whole_df:
                return df.loc[year, label]
            else: ### Returns the whole df if no year specified
                return df
        except:
            if not mute:
                print('Invalid Country: Use find_country() to find valid countries')
            return 'Invalid Country'
        
    except:
        valid_queries = get_indicators_dict(40).keys()
        valid_queries = [q for q in valid_queries if q.find('Population ages') != -1 & q.find('(% of') == -1]
        error_message = f'''
Invalid Query:
Valid Queries:
{valid_queries}
        '''
        
        if not mute:
            print(error_message)
        return 'Invalid Query'

def find_country(country):
    print(wbdata.search_countries())

In [3]:
eg = population(2020, 'male', ('00','04'), 'COL')
eg

1873332.0

# Unit Tests

In [4]:
def unit_test(case):
    if case:
        print('Test Case Passed')
        return case
    else:
        print('Test Case Failed')
        return case

### Test the Value Returns Correct Answers

In [5]:
n = 1

### Test the Unit Test Works
case = population(2020, 'total', ('15','64'), 'COL') < 0
print(f'Test {n}:')
unit_test(~(case))
n += 1

### Basic Functionality
case = population(2020, 'total', ('15','64'), 'COL') > 1e6
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the year hyperparameter works
case = population(2020, 'total', ('15','64'), 'COL') != population(2021, 'total', ('15','64'), 'COL')
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the sex hyperparameter works
case = population(2020, 'total', ('15','64'), 'COL') > population(2020, 'male', ('15','64'), 'COL')
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the place hyperparameter works
case = population(2020, 'total', ('15','64'), 'COL') != population(2020, 'total', ('15','64'), 'USA')
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the age_range hyperparameter works w/ '-'
case = population(2020, 'male', ('15','19'), 'COL') != population(2020, 'male', ('0','14'), 'COL')
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the age_range hyperparameter works w/ '-'
case = type(population(2020, 'male', ('80','above'), 'COL')) == int
print(f'\nTest {n}:')
unit_test(case)
n += 1

Test 1:
Test Case Passed

Test 2:
Test Case Passed

Test 3:
Test Case Passed

Test 4:
Test Case Passed

Test 5:
Test Case Passed

Test 6:
Test Case Passed

Test 7:
Test Case Failed


### Test the Error Messages Work

In [6]:
n = 1

### Test the Invalid Country Message Works
case = population(2020, 'total', ('15','64'), 'SPAM', mute=True) == 'Invalid Country'
print(f'Test {n}:')
unit_test(~(case))
n += 1

### Test the Invalid Query Message Works
case = population(2020, 'total', ('15','SPAM'), 'COL', mute=True) == 'Invalid Query'
print(f'\nTest {n}:')
unit_test(case)
n += 1

Test 1:
Test Case Passed

Test 2:
Test Case Passed


# Population Dataframes

In [9]:
all_countries = wbdata.get_country()

country_list = []
for country in all_countries:
    c = country['id']
    country_list += [c]

In [21]:
def get_valid_ranges(total = False):
    if total:
        valid_queries = get_indicators_dict(40).keys()
        valid_queries = [q for q in valid_queries if (q.find('Population ages') != -1) 
                         & (q.find(', total') != -1) & (q.find('(% of') == -1)]
    else:
        valid_queries = get_indicators_dict(40).keys()
        valid_queries = [q for q in valid_queries if (q.find('Population ages') != -1) 
                         & (q.find(', female') != -1) & (q.find('(% of') == -1)]
    
    valid_ranges = []
    for q in valid_queries:
        m = re.search(r'(\d+-\d+|\d+ and above)', q)
        age_range = m.group(1)
        valid_ranges += [age_range]

    valid_ranges = [(re.split(r"[- | '']", r)[0], 
                     re.split(r"[- | '']", r)[-1]) for r in valid_ranges]

    return valid_ranges
        

def population_dataframe(place):   
    data_dict = {}
    sexes = ['male', 'female']

    valid_ranges = get_valid_ranges()

    ## Need to iterate for male & female
    for s in sexes:
    ### Need to iterate for each age_range
        for r in valid_ranges:
            if r[1] == 'above':
                col_name = f'Population ages {r[0]} and {r[1]}, {s}'
            else:
                col_name = f'Population ages {r[0]}-{r[1]}, {s}'

            pop_df = population(2020, s, r, place, whole_df = True)

            try:
                data_dict['Year'] = pop_df.index
                data_dict[col_name] = pop_df.iloc[:, 0]
                data_dict['Country'] = [place] * len(data_dict['Year'])
            except: # Skips if query fails
                pass

    ### For the Totals Columns
    valid_ranges = get_valid_ranges(total = True)
    
    for r in valid_ranges:
        if r[1] == 'above':
            col_name = f'Population ages {r[0]} and {r[1]}, total'
        else:
            col_name = f'Population ages {r[0]}-{r[1]}, total'

        pop_df = population(2020, s, r, place, whole_df = True)

        try:
            data_dict[col_name] = pop_df.iloc[:, 0]
        except: # Skips if query fails
            pass   
    
    try:
        cdf = pd.DataFrame(data_dict)
        cdf.set_index(['Year', 'Country'], inplace = True)
        return cdf
    except: # Skips if df does not exist (i.e. Only Invalid Queries/Countries)
        return 'Could Not Return Dataframe'

In [25]:
eg = population_dataframe('USA')
eg

Unnamed: 0_level_0,Unnamed: 1_level_0,"Population ages 00-04, male","Population ages 0-14, male","Population ages 05-09, male","Population ages 10-14, male","Population ages 15-19, male","Population ages 15-64, male","Population ages 20-24, male","Population ages 25-29, male","Population ages 30-34, male","Population ages 35-39, male",...,"Population ages 55-59, female","Population ages 60-64, female","Population ages 65-69, female","Population ages 65 and above, female","Population ages 70-74, female","Population ages 75-79, female","Population ages 80 and above, female","Population ages 0-14, total","Population ages 15-64, total","Population ages 65 and above, total"
Year,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2022,USA,9525154.0,30623240.0,10297548.0,10800538.0,11192818.0,108573270.0,11124852.0,11333685.0,11779249.0,11273423.0,...,10715107.0,10828271.0,9747250.0,31261065.0,7933581.0,5857505.0,7722729.0,29233333.0,107771821.0,31261065.0
2021,USA,9653506.0,30993539.0,10354347.0,10985686.0,11097835.0,108436361.0,11056317.0,11462637.0,11660109.0,11210088.0,...,10935847.0,10788833.0,9674037.0,30327543.0,7728504.0,5440730.0,7484272.0,29579093.0,107643366.0,30327543.0
2020,USA,9852347.0,31402764.0,10415543.0,11134875.0,11075090.0,108576684.0,11019470.0,11624513.0,11559052.0,11181543.0,...,11142190.0,10724223.0,9509650.0,29453385.0,7454655.0,5166253.0,7322827.0,29964491.0,107785134.0,29453385.0
2019,USA,9966405.0,31478486.0,10406055.0,11106026.0,10987004.0,107831375.0,10951050.0,11663144.0,11361102.0,11050733.0,...,11163030.0,10564689.0,9015729.0,28428926.0,7227138.0,4994841.0,7191218.0,30037086.0,107133793.0,28428926.0
2018,USA,10075446.0,31624948.0,10491691.0,11057812.0,10944241.0,107615727.0,11028203.0,11698726.0,11234670.0,10938202.0,...,11184265.0,10435807.0,8691035.0,27654382.0,6996163.0,4809893.0,7157291.0,30183494.0,107088472.0,27654382.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1964,USA,10238054.0,29829126.0,10229317.0,9361755.0,8052724.0,56898756.0,6268166.0,5553623.0,6286435.0,6174451.0,...,4836839.0,3889258.0,3645486.0,10309976.0,2859831.0,1961767.0,1842892.0,28775954.0,58120062.0,10309976.0
1963,USA,10326196.0,29456293.0,10102636.0,9027461.0,7805866.0,56166983.0,6007957.0,5533402.0,6463115.0,6117296.0,...,4672679.0,3895651.0,3556866.0,10031758.0,2803929.0,1901340.0,1769622.0,28403741.0,57328979.0,10031758.0
1962,USA,10374649.0,29176142.0,9913431.0,8888062.0,7456087.0,55323255.0,5699131.0,5614626.0,6494632.0,6170912.0,...,4544874.0,3881892.0,3433035.0,9744704.0,2762829.0,1843506.0,1705335.0,28177770.0,56371329.0,9744704.0
1961,USA,10370108.0,28878168.0,9880207.0,8627853.0,6940722.0,54428400.0,5575583.0,5677222.0,6527553.0,6049136.0,...,4461554.0,3834536.0,3349577.0,9465807.0,2691523.0,1783765.0,1640941.0,27904227.0,55377628.0,9465807.0


# Population Pyramids