# Our Country: Colombia

In [1]:
### Uncomment this to download
# !pip install wbdata
import numpy as np
import pandas as pd
import wbdata
import re

# Population Statistics

In [2]:
def get_indicators_dict(src):
    indicators = wbdata.get_indicator(source = src)
    
    indicator_dict = {}
    for indicator in indicators:
        ID = indicator['id']
        name = indicator['name']
        indicator_dict[name] = ID
        
    return indicator_dict
    

def population(year, sex, age_range, place, whole_df = False, mute=True):
    src = 40
    
    indicator_dict = get_indicators_dict(src)
    
    try:
        try: # For queries like 'Population ages 10-14, female'
            label = f'Population ages {age_range[0]}-{age_range[1]}, {sex.lower()}'
            variable_labels = {(indicator_dict[label]): label}
            
        except: # For queries like 'Population ages 65 and above, female'
            label = f'Population ages {age_range[0]} and {age_range[1]}, {sex.lower()}'
            variable_labels = {(indicator_dict[label]): label}            

        try:
            df = wbdata.get_dataframe(variable_labels, country=place)
            df.index = df.index.astype(int)
            if not whole_df:
                return df.loc[year, label]
            else: ### Returns the whole df if no year specified
                return df
        except:
            if not mute:
                print('Invalid Country: Use find_country() to find valid countries')
            return 'Invalid Country'
        
    except:
        valid_queries = get_indicators_dict(40).keys()
        valid_queries = [q for q in valid_queries if q.find('Population ages') != -1 & q.find('(% of') == -1]
        error_message = f'''
Invalid Query:
Valid Queries:
{valid_queries}
        '''
        
        if not mute:
            print(error_message)
        return 'Invalid Query'

def find_country(country):
    print(wbdata.search_countries())

In [3]:
eg = population(2020, 'male', ('00','04'), 'COL')
eg

1873332.0

# Unit Tests

In [4]:
def unit_test(case):
    if case:
        print('Test Case Passed')
        return case
    else:
        print('Test Case Failed')
        return case

### Test the Value Returns Correct Answers

In [5]:
n = 1

### Test the Unit Test Works
case = population(2020, 'total', ('15','64'), 'COL') < 0
print(f'Test {n}:')
unit_test(~(case))
n += 1

### Basic Functionality
case = population(2020, 'total', ('15','64'), 'COL') > 1e6
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the year hyperparameter works
case = population(2020, 'total', ('15','64'), 'COL') != population(2021, 'total', ('15','64'), 'COL')
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the sex hyperparameter works
case = population(2020, 'total', ('15','64'), 'COL') > population(2020, 'male', ('15','64'), 'COL')
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the place hyperparameter works
case = population(2020, 'total', ('15','64'), 'COL') != population(2020, 'total', ('15','64'), 'USA')
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the age_range hyperparameter works w/ '-'
case = population(2020, 'male', ('15','19'), 'COL') != population(2020, 'male', ('0','14'), 'COL')
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the age_range hyperparameter works w/ '-'
case = type(population(2020, 'male', ('80','above'), 'COL')) == int
print(f'\nTest {n}:')
unit_test(case)
n += 1

Test 1:
Test Case Passed

Test 2:
Test Case Passed

Test 3:
Test Case Passed

Test 4:
Test Case Passed

Test 5:
Test Case Passed

Test 6:
Test Case Passed

Test 7:
Test Case Failed


### Test the Error Messages Work

In [6]:
n = 1

### Test the Invalid Country Message Works
case = population(2020, 'total', ('15','64'), 'SPAM', mute=True) == 'Invalid Country'
print(f'Test {n}:')
unit_test(~(case))
n += 1

### Test the Invalid Query Message Works
case = population(2020, 'total', ('15','SPAM'), 'COL', mute=True) == 'Invalid Query'
print(f'\nTest {n}:')
unit_test(case)
n += 1

Test 1:
Test Case Passed

Test 2:
Test Case Passed


# Population Dataframes

In [9]:
all_countries = wbdata.get_country()

country_list = []
for country in all_countries:
    c = country['id']
    country_list += [c]

In [12]:
def population_dataframe(place):   
    data_dict = {}
    sexes = ['male', 'female']
    
    valid_queries = get_indicators_dict(40).keys()
    valid_queries = [q for q in valid_queries if (q.find('Population ages') != -1) 
                     & (q.find(', female') != -1) & (q.find('(% of') == -1)]
    valid_ranges = []
    for q in valid_queries:
        m = re.search(r'(\d+-\d+|\d+ and above)', q)
        age_range = m.group(1)
        valid_ranges += [age_range]

    valid_ranges = [(re.split(r"[- | '']", r)[0], 
                     re.split(r"[- | '']", r)[-1]) for r in valid_ranges]

    valid_ranges

    ## Need to iterate for male & female
    for s in sexes:
    ### Need to iterate for each age_range
        for r in valid_ranges:
            if r[1] == 'above':
                col_name = f'Population ages {r[0]} and {r[1]}, {s}'
            else:
                col_name = f'Population ages {r[0]}-{r[1]}, {s}'

            pop_df = population(2020, s, r, place, whole_df = True)

            try:
                data_dict['Year'] = pop_df.index
                data_dict[col_name] = pop_df.iloc[:, 0]
                data_dict['Country'] = [place] * len(data_dict['Year'])
            except: # Skips if query fails
                pass

    try:
        cdf = pd.DataFrame(data_dict)
        cdf.set_index(['Year', 'Country'], inplace = True)
        return cdf
    except: # Skips if df does not exist (i.e. Only Invalid Queries/Countries)
        return 'Could Not Return Dataframe'

In [13]:
dummy = population_dataframe('COL')

In [14]:
dummy

Unnamed: 0_level_0,Unnamed: 1_level_0,"Population ages 00-04, male","Population ages 0-14, male","Population ages 05-09, male","Population ages 10-14, male","Population ages 15-19, male","Population ages 15-64, male","Population ages 20-24, male","Population ages 25-29, male","Population ages 30-34, male","Population ages 35-39, male",...,"Population ages 40-44, female","Population ages 45-49, female","Population ages 50-54, female","Population ages 55-59, female","Population ages 60-64, female","Population ages 65-69, female","Population ages 65 and above, female","Population ages 70-74, female","Population ages 75-79, female","Population ages 80 and above, female"
Year,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2022,COL,1863450.0,5647955.0,1879870.0,1904636.0,2044762.0,17865331.0,2211820.0,2271699.0,2176677.0,1980767.0,...,1824633.0,1618113.0,1531546.0,1464314.0,1243123.0,960937.0,2605831.0,696194.0,472134.0,476565.0
2021,COL,1872904.0,5675317.0,1880579.0,1921834.0,2085844.0,17749414.0,2237669.0,2272080.0,2145863.0,1947363.0,...,1785391.0,1593458.0,1532362.0,1443643.0,1201652.0,922318.0,2501582.0,665106.0,452538.0,461621.0
2020,COL,1873332.0,5695619.0,1880087.0,1942201.0,2113654.0,17526007.0,2239826.0,2247557.0,2098935.0,1903638.0,...,1739827.0,1572269.0,1529761.0,1414124.0,1157392.0,883473.0,2397042.0,634029.0,432973.0,446568.0
2019,COL,1868705.0,5716525.0,1881583.0,1966236.0,2131193.0,17230258.0,2228019.0,2207686.0,2045197.0,1854395.0,...,1691680.0,1555811.0,1522164.0,1375497.0,1109581.0,842826.0,2284866.0,600957.0,411437.0,429646.0
2018,COL,1860651.0,5736828.0,1884949.0,1991227.0,2134154.0,16856472.0,2198986.0,2150158.0,1984049.0,1798082.0,...,1643350.0,1542927.0,1507398.0,1330663.0,1060247.0,799790.0,2170200.0,569642.0,389033.0,411735.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1964,COL,1688662.0,4255437.0,1406775.0,1159999.0,919903.0,4363247.0,732719.0,587877.0,498942.0,419034.0,...,353056.0,295477.0,257018.0,200371.0,156923.0,123563.0,310012.0,88759.0,57667.0,40023.0
1963,COL,1649980.0,4122271.0,1358786.0,1113505.0,884553.0,4233603.0,706668.0,574609.0,488153.0,408355.0,...,342569.0,291728.0,248694.0,193886.0,153687.0,120213.0,300078.0,86701.0,55023.0,38140.0
1962,COL,1607957.0,3984776.0,1311023.0,1065797.0,853571.0,4112817.0,682476.0,563376.0,477068.0,397490.0,...,332585.0,288949.0,239125.0,188221.0,150961.0,116467.0,290189.0,84973.0,52200.0,36549.0
1961,COL,1563136.0,3848515.0,1263980.0,1021399.0,822609.0,3996507.0,660764.0,552624.0,466079.0,386731.0,...,323832.0,285283.0,229780.0,183074.0,148274.0,112844.0,280704.0,83135.0,49499.0,35224.0


# Population Pyramids