In [1]:
import pandas as pd
import numpy as np

In [2]:
%autosave 0

Autosave disabled


In [10]:
df = pd.read_excel('Data/2016_Uist_data.xlsx')

In [12]:
def population_projection(dataframe, years, reps, old_age_mortality):
    """
    Using the information on population structure in baseline year
    project the growth or decline of that structure forward given 
    number of years.
    """    
    df = dataframe.copy()
    df.iloc[90,4] = 100000 * old_age_mortality
    df.iloc[181,4] = 100000 * old_age_mortality
    
    def _survival_vector(row):
        """
        Helper function to probabilistically determine
        how many of the starter population survive given
        survival rates
        """
        dice = np.random.randint(0, 100000, int(row['Pop_Count']))
        success = list(dice > row['Mortality_per_100000'])
        return success.count(True)

    d = {} #dictionary for Bokeh multiline
    test_dict = {} #dictionary for DFs for model testing
    
    for i in range(reps):
        #reset x, y and df every rep
        x = [0] #years
        y = [4679] #total sum of projected population
        df_male = df[df['Gender'] == 'Male'].reset_index(drop=True)
        df_female = df[df['Gender'] == 'Female'].reset_index(drop=True)
        
        ppyr_df = pd.DataFrame(columns=['Age', 'Year', 'Gender', 'Pop_Count'])
    
        for j in range(1, years + 1):
            
            male_90 = 0
            female_90 = 0
            
            survived_vector_male = df_male.copy().apply(_survival_vector, axis=1)
            survived_vector_female = df_female.copy().apply(_survival_vector, axis=1)
            male_90 = survived_vector_male[90]
            female_90 = survived_vector_female[90]
            
            #append the year's population totals to the multiline lists
            x.append(j)
            y.append(survived_vector_male.copy().sum()+survived_vector_female.copy().sum())
            
            #overwrite the population counts & add the survivors to the 90+ group
            df_male.loc[:,'Pop_Count'] = survived_vector_male.shift(1).fillna(0)
            df_female.loc[:,'Pop_Count'] = survived_vector_female.shift(1).fillna(0)
            df_male.loc[90, 'Pop_Count'] += male_90
            df_female.loc[90, 'Pop_Count'] += female_90
            
            #create a population pyramid df for the first rep
            if i == 0:
            
                ppyr_df = ppyr_df.append(df_male.copy()[['Age','Gender','Pop_Count']],
                                         ignore_index=True)
                ppyr_df = ppyr_df.append(df_female.copy()[['Age','Gender','Pop_Count']],
                                         ignore_index=True)
                ppyr_df.fillna(j, inplace=True) #since we don't append year, each iteration will create
                                                #generate the exact number of NaNs that we want.
            
            
            #test_dict[j] = df_male.copy()
            
        d[i] = {'x':x, 'y':y}
        
    return (d, ppyr_df)

In [21]:
data, pop_df = population_projection(df, 15, 50, 0.15)

In [6]:
pop_df.to_csv('pop.csv')

In [None]:
#output the full dataframe for each year;

writer = pd.ExcelWriter('test_ages.xlsx', engine='xlsxwriter')

for year, df in df_dict.items():
    df.to_excel(writer, sheet_name = "Year{}".format(year))
writer.save()

<center>
Plot the projections using Bokeh
<br>
replace with animated D3 in the final version</center>

In [7]:
from bokeh.plotting import figure, output_notebook, output_file, show
from bokeh.models import ColumnDataSource
from bokeh.models import Title
from bokeh.plotting import  curdoc
from bokeh.io import reset_output

In [22]:
data_df = pd.DataFrame.from_dict(data, orient='index')

In [None]:
#output reset; uncomment and run
#reset_output()
#curdoc().clear()

In [23]:
p = figure(plot_width=800, plot_height=400, y_range=(0,4700))
source = ColumnDataSource(data_df)

p.grid.visible=False
p.add_layout(Title(text="50 simulations using low (0.15) old age mortality",
                   text_font_style="italic", align='center'), 'above')
p.add_layout(Title(text="Probabilistic population projection",
                   align='center', text_font_size="16pt"), 'above')


years = list(range(0,26))
year_aliases = ['{}'.format(x) for x in range(2016,2032)]
p.xaxis.ticker = years
p.xaxis.major_label_overrides = dict(zip(years, year_aliases))

p.multi_line('x', 'y', line_color='black', line_alpha=1, line_width=0.05, source=source)
output_file("projection-low.html", title='Uist population projection')
show(p)

  elif np.issubdtype(type(obj), np.float):
