In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from scipy.interpolate import splev, splrep
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
%matplotlib inline


demography = pd.read_csv('../data/demography_by_birth_year.csv',
                         index_col = ['Birth_year', 'Season'])

vac_cov_by_age = pd.read_csv('../raw_data/vac_coverage_by_age_seasonal_2010.csv')
final_df = vac_cov_by_age.groupby(['Age','Season']).sum().copy()


In [2]:
# Assume that coverage is the same for everyone over 90
for age in range(91, 101):
    for season in set(list(vac_cov_by_age.Season)):
        final_df.loc[(age, season),  'coverage'] = final_df.loc[(90, season), 'coverage']

In [3]:
all_seasons = list(set(vac_cov_by_age.Season))
index = pd.MultiIndex.from_product([range(1918, 2018), all_seasons], names=['Birth_year', 'Season'])
birth_year_df = pd.DataFrame(index=index, columns=['coverage'])

# Convert to birth year

for birth_year in range(1918, 2018):
    for season in range(2008, 2019):
        if birth_year <= season - 1:
            a1 = demography.loc[(birth_year, season), 'a1']
            a2 = demography.loc[(birth_year, season), 'a2']
            f1 = demography.loc[(birth_year, season), 'f1']
            f2 = demography.loc[(birth_year, season), 'f2']
            if np.isnan(a1):
                f1 = 0
                a1 = 0
            if np.isnan(a2):
                f2 = 0
                a2 = 0
            cov1 = final_df.loc[(a1, season), 'coverage']

            cov2 = final_df.loc[(a2, season), 'coverage']

            cov = cov1 * f1 + cov2 * f2
            
            birth_year_df.loc[(birth_year, season), 'coverage'] = cov
birth_year_df.to_csv('../data/vac_coverage_by_birth_year_seasonal_2010.csv')