# Our Country: Colombia

In [29]:
### Uncomment this to download
# !pip install wbdata
import pandas
import wbdata
import re

# Population Statistics

In [376]:
def get_indicators_dict(src):
    indicators = wbdata.get_indicator(source = src)
    
    indicator_dict = {}
    for indicator in indicators:
        ID = indicator['id']
        name = indicator['name']
        indicator_dict[name] = ID
        
    return indicator_dict
    

def population(year, sex, age_range, place, whole_df = False, mute=True):
    src = 40
    
    indicator_dict = get_indicators_dict(src)
    
    try:
        try: # For queries like 'Population ages 10-14, female'
            label = f'Population ages {age_range[0]}-{age_range[1]}, {sex.lower()}'
            variable_labels = {(indicator_dict[label]): label}
            
        except: # For queries like 'Population ages 65 and above, female'
            label = f'Population ages {age_range[0]} and {age_range[1]}, {sex.lower()}'
            variable_labels = {(indicator_dict[label]): label}            

        try:
            df = wbdata.get_dataframe(variable_labels, country=place)
            df.index = df.index.astype(int)
            if not whole_df:
                return df.loc[year, label]
            else: ### Returns the whole df if no year specified
                return df
        except:
            if not mute:
                print('Invalid Country: Use find_country() to find valid countries')
            return 'Invalid Country'
        
    except:
        valid_queries = get_indicators_dict(40).keys()
        valid_queries = [q for q in valid_queries if q.find('Population ages') != -1 & q.find('(% of') == -1]
        error_message = f'''
Invalid Query:
Valid Queries:
{valid_queries}
        '''
        
        if not mute:
            print(error_message)
        return 'Invalid Query'

def find_country(country):
    print(wbdata.search_countries())

In [377]:
eg = population(2020, 'male', ('00','04'), 'COL', whole_df = True)
eg

Unnamed: 0_level_0,"Population ages 00-04, male"
date,Unnamed: 1_level_1
2022,1863450.0
2021,1872904.0
2020,1873332.0
2019,1868705.0
2018,1860651.0
...,...
1964,1688662.0
1963,1649980.0
1962,1607957.0
1961,1563136.0


# Unit Tests

In [318]:
def unit_test(case):
    if case:
        print('Test Case Passed')
        return case
    else:
        print('Test Case Failed')
        return case

### Test the Value Returns Correct Answers

In [320]:
n = 1

### Test the Unit Test Works
case = population(2020, 'total', ('15','64'), 'COL') < 0
print(f'Test {n}:')
unit_test(~(case))
n += 1

### Basic Functionality
case = population(2020, 'total', ('15','64'), 'COL') > 1e6
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the year hyperparameter works
case = population(2020, 'total', ('15','64'), 'COL') != population(2021, 'total', ('15','64'), 'COL')
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the sex hyperparameter works
case = population(2020, 'total', ('15','64'), 'COL') > population(2020, 'male', ('15','64'), 'COL')
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the place hyperparameter works
case = population(2020, 'total', ('15','64'), 'COL') != population(2020, 'total', ('15','64'), 'USA')
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the age_range hyperparameter works w/ '-'
case = population(2020, 'male', ('15','19'), 'COL') != population(2020, 'male', ('0','14'), 'COL')
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the age_range hyperparameter works w/ '-'
case = type(population(2020, 'male', ('80','above'), 'COL')) == int
print(f'\nTest {n}:')
unit_test(case)
n += 1

Test 1:
Test Case Passed

Test 2:
Test Case Passed

Test 3:
Test Case Passed

Test 4:
Test Case Passed

Test 5:
Test Case Passed

Test 6:
Test Case Passed

Test 7:
Test Case Failed


### Test the Error Messages Work

In [321]:
n = 1

### Test the Invalid Country Message Works
case = population(2020, 'total', ('15','64'), 'SPAM', mute=True) == 'Invalid Country'
print(f'Test {n}:')
unit_test(~(case))
n += 1

### Test the Invalid Query Message Works
case = population(2020, 'total', ('15','SPAM'), 'COL', mute=True) == 'Invalid Query'
print(f'\nTest {n}:')
unit_test(case)
n += 1

Test 1:
Test Case Passed

Test 2:
Test Case Passed


# Population Dataframes

In [322]:
all_countries = wbdata.get_country()

country_list = []
for country in all_countries:
    c = country['id']
    country_list += [c]

In [358]:
valid_queries = get_indicators_dict(40).keys()
valid_queries = [q for q in valid_queries if (q.find('Population ages') != -1) 
                 & (q.find(', female') != -1) & (q.find('(% of') == -1)]
valid_ranges = []
for q in valid_queries:
    m = re.search(r'(\d+-\d+|\d+ and above)', q)
    age_range = m.group(1)
    valid_ranges += [age_range]

valid_ranges = [(re.split(r"[- | '']", r)[0], 
                 re.split(r"[- | '']", r)[-1]) for r in valid_ranges]

valid_ranges

[('00', '04'),
 ('0', '14'),
 ('05', '09'),
 ('10', '14'),
 ('15', '19'),
 ('15', '64'),
 ('20', '24'),
 ('25', '29'),
 ('30', '34'),
 ('35', '39'),
 ('40', '44'),
 ('45', '49'),
 ('50', '54'),
 ('55', '59'),
 ('60', '64'),
 ('65', '69'),
 ('65', 'above'),
 ('70', '74'),
 ('75', '79'),
 ('80', 'above')]

In [359]:
# Need to iterate for each country
## Need to iterate for each year
### Need to iterate for male & female
#### Need to iterate for each age_range


('80', 'above')

# Population Pyramids