# Our Country: Colombia

In [None]:
### Uncomment this to download
# !pip install wbdata
# !pip install cufflinks
import numpy as np
import pandas as pd
import wbdata
import re
import plotly.offline as plto
import plotly.graph_objs as go
import matplotlib.pyplot as plt
import cufflinks as cf

# Population Statistics

In [None]:
def is_estimation(lower_bound, upper_bound, lower_bounds):
    verdict1 = sum([l == lower_bound for l in lower_bounds])
    verdict2 = sum([l - 1 == upper_bound for l in lower_bounds])
    estimation = verdict1 == 0 or verdict2 == 0
    return estimation 

def get_age_ranges():
    ar = [] ### Age Ranges List

    # Ranges top out at 80, and go in five year increments
    for i in range(0,80,5):
        ar += [(f"{i:02d}"+f"{i+4:02d}")]

    ar += ["80UP"]
    return ar

def find_country(country):
    print(wbdata.search_countries(country))

def population(year, sex, age_range, place):
    sex = sex.lower()

    lower_bound = age_range[0]
    upper_bound = age_range[1]
    ar = get_age_ranges()
    ar_lower_bounds = [int(r[0:2]) for r in ar]
    ar_lower_bounds

    desired_range_lb_index = 0
    desired_range_ub_index = 0

    for lb in ar_lower_bounds:
        if lb < lower_bound:
            desired_range_lb_index += 1
        if lb < upper_bound:
            desired_range_ub_index += 1

    desired_indexes = ar[desired_range_lb_index: desired_range_ub_index]

    if sex == 'female':
        desired_categories = {"SP.POP."+r+".FE":"Females "+r for r in desired_indexes} ### Female Categories
    else:
        desired_categories = {"SP.POP."+r+".MA":"Males "+r for r in desired_indexes} ### Male Categories
    
    try:
        df = wbdata.get_dataframe(desired_categories, country=place)
        df.index = df.index.astype(int)

        estimation = is_estimation(lower_bound, upper_bound, ar_lower_bounds)

        if estimation:
            print(f'Exact Range Not Given. Answer is an Estimation. Included ranges for estimation are {(df.columns).to_numpy()}')

        df = df.apply(np.sum, axis=1)
        
        try: 
            answer = df.loc[year]
            return answer
        except: # Catches error resulting from invalid year entry
            print(f'{year} is an Invalid Year. Ensure that year is between {min(df.index)} and {max(df.index)}')
            return 'Invalid Year'
    except:  # Catches error resulting from invalid place entry
        print(f'{place} is an Invalid Country. Use find_country() to find valid countries')
        return 'Invalid Country'

In [None]:
year = 2020
place = 'COL'
age_range = (0, 100)

sex = 'male'
m_pop = population(year, sex, age_range, place)

sex = 'female'
f_pop = population(year, sex, age_range, place)

tot_pop = m_pop + f_pop
tot_pop

# Population Dataframes

In [None]:
def get_categories():
    ar = [] ### Age Ranges List

    # Ranges top out at 80, and go in five year increments
    for i in range(0,80,5):
        ar += [(f"{i:02d}"+f"{i+4:02d}")]

    ar += ["80UP"]

    m_cat = {"SP.POP."+r+".MA":"Males "+r for r in ar} ### Male Categories
    f_cat = {"SP.POP."+r+".FE":"Females "+r for r in ar} ### Female Categories

    cats = {**m_cat, **f_cat}
    return cats

def population_dataframe(place):
    cats = get_categories()
    try:
        pop_df = wbdata.get_dataframe(cats,country=place)
        return pop_df
    except:
        print(f'Could Not Return a Dataframe for {place}')
        return 'Invalid Country'

In [None]:
eg = population_dataframe('COL')
eg

# Population Pyramids

In [None]:
def plot_population_pyramid(pop_df, year):
    plto.init_notebook_mode(connected=True)

    layout = go.Layout(barmode='overlay',
                       yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
                       xaxis=go.layout.XAxis(title='Number'),
                       title=dict(text='Initial Plot Title',
                                  x=0.5, 
                                  font=dict(color='black', family='Arial', size=24))
                      )

    ar = get_age_ranges()
    
    try:
        bins = [go.Bar(x = pop_df.loc[str(year),:].filter(regex="Male").values,
                       y = [int(s[:2])+1 for s in ar],
                       orientation='h',
                       name='Men',
                       marker=dict(color='blue'),
                       hoverinfo='skip'
                       ),

                go.Bar(x = -pop_df.loc[str(year),:].filter(regex="Female").values,
                       y=[int(s[:2])+1 for s in ar],
                       orientation='h',
                       name='Women',
                       marker=dict(color='red'),
                       hoverinfo='skip',
                       )
                ]
        fig = go.Figure(data=bins, layout=layout)

        fig.update_xaxes(title_text='Population')

        fig.update_yaxes(title_text='Age Range')

        fig.update_layout(title_text=f'Population Pyramid in {year}')
        fig.show()
    
    except:
        print(f'{year} is an Invalid Year. Ensure that year is between {min(pop_df.index)} and {max(pop_df.index)}')
        return 'Invalid Year'

In [None]:
pop_df = population_dataframe('COL')
for year in np.arange(1970, 2030, 10):
    plot_population_pyramid(pop_df, year)

# Unit Tests

In [None]:
def unit_test(case):
    if case:
        print('Test Case Passed')
        return case
    else:
        print('Test Case Failed')
        return case

### population( )Test the Value Returns Correct Answers

In [None]:
n = 1

### Test the Unit Test Works
year = 2020
place = 'COL'
age_range = (0, 59)

sex = 'male'
pop = population(year, sex, age_range, place)
case = pop < 0
print(f'Test {n}:')
unit_test(~(case))
n += 1

### Basic Functionality
year = 2020
place = 'COL'
age_range = (0, 59)

sex = 'male'
m_pop = population(year, sex, age_range, place)

sex = 'female'
f_pop = population(year, sex, age_range, place)

tot_pop = m_pop + f_pop
case = tot_pop > 4e7
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the year hyperparameter works
sex = 'male'
place = 'COL'
age_range = (0, 59)

year = 2020
m_pop1 = population(year, sex, age_range, place)

year = 2021
m_pop2 = population(year, sex, age_range, place)

case = m_pop1 != m_pop2
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the sex hyperparameter works
year = 2020
place = 'COL'
age_range = (0, 59)

sex = 'male'
m_pop = population(year, sex, age_range, place)

sex = 'female'
f_pop = population(year, sex, age_range, place)

case = m_pop != f_pop
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the place hyperparameter works
sex = 'male'
place = 'COL'
year = 2020
age_range = (0, 59)

place = 'COL'
m_pop1 = population(year, sex, age_range, place)

place = 'USA'
m_pop2 = population(year, sex, age_range, place)

case = m_pop1 != m_pop2
print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the age_range hyperparameter wors
sex = 'male'
place = 'COL'
year = 2020

age_range = (0, 59)
m_pop1 = population(year, sex, age_range, place)

age_range = (0, 49)
m_pop2 = population(year, sex, age_range, place)

case = m_pop1 != m_pop2
print(f'\nTest {n}:')
unit_test(case)
n += 1

### population( ) Test the Error Messages Work

In [None]:
n = 1

### Test the Invalid Country Message Works
sex = 'male'
place = 'SPAM'
year = 2020
age_range = (0, 59)

pop = population(year, sex, age_range, place)
case = pop == 'Invalid Country'
print(f'Test {n}:')
unit_test(~(case))
n += 1

### Test the Invalid Year Message Works
sex = 'male'
place = 'COL'
year = 0
age_range = (0, 59)

pop = population(year, sex, age_range, place)
case = pop == 'Invalid Year'
print(f'\nTest {n}:')
unit_test(case)
n += 1

## population_dataframe( ) Unit Tests

In [None]:
### Basic Functionality
n = 1
eg = population_dataframe('COL')
case = len(eg) == 63

print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the Invalid Country error message works
eg = population_dataframe('SPAM')
case = eg == 'Invalid Country'

print(f'\nTest {n}:')
unit_test(case)
n += 1

## plot_population_pyramid( ) Unit Tests

In [None]:
### Basic Functionality
n = 1
pop_df = population_dataframe('COL')

eg = plot_population_pyramid(pop_df, 2020)
case = eg == None ### Ploting the graph is NoneType. If the graph plots, then it equals None

print(f'\nTest {n}:')
unit_test(case)
n += 1

### Test the Invalid Country error message works
eg = plot_population_pyramid(pop_df, 0)
case = eg == 'Invalid Year'

print(f'\nTest {n}:')
unit_test(case)
n += 1

# Extras: Animated Population Pyramids

In [None]:
def plot_population_pyramid_animated(pop_df, years):
    plto.init_notebook_mode(connected=True)
    layout = go.Layout(
        barmode='overlay',
        yaxis=go.layout.YAxis(range=[0, 90], title='Age'),
        xaxis=go.layout.XAxis(title='Number'),
        title=dict(text='Initial Plot Title', x=0.5, font=dict(color='black', family='Arial', size=24))
    )

    ar = get_age_ranges()

    frames = [go.Frame(
        data=[
            go.Bar(
                x=pop_df.loc[str(year), :].filter(regex="Male").values,
                y=[int(s[:2]) + 1 for s in ar],
                orientation='h',
                name='Men',
                marker=dict(color='blue'),
                hoverinfo='skip'
            ),
            go.Bar(
                x=-pop_df.loc[str(year), :].filter(regex="Female").values,
                y=[int(s[:2]) + 1 for s in ar],
                orientation='h',
                name='Women',
                marker=dict(color='red'),
                hoverinfo='skip',
            )
        ],
        name=str(year),
    ) for year in years]

    fig = go.Figure(data=frames[0]['data'], layout=layout, frames=frames)

    fig.update_xaxes(title_text='Population')
    fig.update_yaxes(title_text='Age Range')

    fig.update_layout(title_text=f'Population Pyramid Over Time from {years[0]} to {years[-1]}')

    fig.update_layout(updatemenus=[dict(type='buttons', showactive=False, buttons=[dict(label='Play',
                                                method='animate', args=[None, dict(frame=dict(duration=500, redraw=True),
                                                fromcurrent=True)])])])

    fig.update_layout(sliders=[dict(yanchor='top', xanchor='left', currentvalue=dict(font=dict(size=16),
                              prefix='Year:', visible=True, xanchor='right'), transition=dict(duration=300, easing='cubic-in-out'),
                              steps=[dict(args=[[frame.name], dict(frame=dict(duration=300, redraw=True), mode='immediate',
                              transition=dict(duration=300))], label=str(frame.name), method='animate') for frame in frames])])

    fig.show()

In [None]:
pop_df = population_dataframe('COL')
year_range = np.arange(1970, 2022)
plot_population_pyramid_animated(pop_df, year_range)

# Extras: Additional Population Visualizations

In [None]:
cf.go_offline()

In [None]:
find_country('asia')

In [None]:
labels = {"SP.POP.TOTL":"Population"}

countries = {"NAC" : "North America",
             "LCN" : "Latin America & Caribbean",
             "EUU" : "European Union",
             "TEA" : "East Asia & Pacific (IDA & IBRD countries)",
             "SAS" : "South Asia"
            }

plot_df = wbdata.get_dataframe(labels, country = countries).squeeze()

plot_df = plot_df.unstack('country')

plot_df.index = plot_df.index.astype(int)

np.log(plot_df).diff().iplot(title="Population Growth Rates In LATAM vs Other Regions of the World",
                        yTitle="Growth Rate",xTitle='Year')

In [None]:
label = {"SP.POP.TOTL":"World Population"}

plot_df = wbdata.get_dataframe(label, country="COL")

plot_df.index = plot_df.index.astype(int)

plot_df = plot_df.sort_index() ### Need it to be ascending

### To get growth rate: np.log(plot_df).diff()
np.log(plot_df).diff().iplot(title="Population Growth Rates in Colombia: Social Unrest Causes Major Drops In Rates", 
                             xTitle='Year', yTitle='Growth Rate')
### Mid 60s: Conflict with Far-Right Guerillas Break Out
### Late 80s - Early 90s: Pablo Escobar Reign of Terror & Cartel Wars
### 2015 onwards collapse of Venezualan economy (more imigration to countries like Colombia)
### 2020: COVID

In [None]:
labels = {"SP.POP.TOTL":"Population"}

countries = {"COL":"Colombia",
             "BRA":"Brazil",
             "VEN":"Venezuela",
             "ECU":"Ecuador",
             "PER":"Peru"
            }

plot_df = wbdata.get_dataframe(labels, country = countries).squeeze()

plot_df = plot_df.unstack('country')

plot_df.index = plot_df.index.astype(int)

np.log(plot_df).diff().iplot(title="Population Growth Rates In Colombia vs Neighbors",
                        yTitle="Growth Rate",xTitle='Year')

In [None]:
labels = {"SP.POP.TOTL":"Population"}

countries = {"COL":"Colombia",
             "VEN":"Venezuela"
            }

plot_df = wbdata.get_dataframe(labels, country = countries).squeeze()

plot_df = plot_df.unstack('country')

plot_df.index = plot_df.index.astype(int)

np.log(plot_df).diff().iplot(title="Population Growth Rates In Colombia vs Venezuela",
                        yTitle="Growth Rate",xTitle='Year')

# Extras: Agricultural Supply