In [1]:
import sqlite3
import pandas as pd
from datetime import datetime, timedelta, date
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import pymmwr
import calendar
from scipy.interpolate import splev, splrep
from collections import defaultdict
from scipy import stats
from collections import Counter

def get_max_week(year):
    '''
    Given a year, gets the maximum MMWR week for that year (either 52 or 53)
    '''
    birth_year_max_week = 0
    for day in range(1, 32):
        week = pymmwr.date_to_epiweek(date(year, 12, day)).week
        if week > birth_year_max_week:
            birth_year_max_week = week
    return birth_year_max_week


def date_to_mmwr_week(date, season_start_week=40):
    '''
    This method takes a datetime object and returns the flu season.
    '''
    mmwr_date = pymmwr.date_to_epiweek(date)
    week = mmwr_date.week
    year = mmwr_date.year
    return (week, year)


def weekly_demo_function(year,
                         week, 
                         birth_year,
                         waning_time_days=180):
    
    '''
    Given a birth year, a year, and an mmwr week in that year, this returns
    the fraction of the population born in that birth year that experiences that
    partciular week.
    '''
    
    # Converts the waning period into a datetime timedelta object
    waning_period = timedelta(waning_time_days)
    
    # Calculates the effective first day that people in a particular birth year are susceptible and the
    # last day that someone born in that year wanes
    initial_day = pymmwr.date_to_epiweek(date(birth_year, 1, 1) 
                                         + waning_period)
    final_day = pymmwr.date_to_epiweek(date(birth_year, 12, 31) 
                                         + waning_period)
    
    birth_year_max_week = get_max_week(birth_year)
   
    # Turns those days into weeks
    initial_week = (initial_day.year, initial_day.week)
    final_week = (final_day.year, final_day.week)
    
    # If the week in question is before maternal waning has occurred then no individual in that
    # birth year class is "exposed"
    if (year, week) < initial_week:
        demo_frac = 0
    
    # If we're past the last day of waning, then everyone is exposed
    elif (year, week) >= final_week:
        demo_frac = 1
    
    # Otherwise, we need to calculate a fraction of the population exposed
    else:
        if year == birth_year:
            multiplier = int(week) - int(initial_week[1]) + 1
        elif year == birth_year + 1:
            multiplier = (int(birth_year_max_week) - int(initial_week[1]) + 1) + int(week)
        demo_frac = (multiplier / birth_year_max_week)

    return demo_frac 

def week_to_ordinal_week(season, week):
    if season == '2009Pan':
        first_year = 2009
    else:
        first_year = int(season) - 1
        
    start_week, start_year = date_to_mmwr_week(datetime.strptime(start_dates[str(season)], '%Y-%m-%d').date())
    end_week_first_year, end_week_year =  date_to_mmwr_week(datetime.strptime(str(first_year) + '-12-28', '%Y-%m-%d').date())

    
    if week >= start_week and week <= end_week_first_year:
        ordinal_week = week - start_week
    else:
        ordinal_week = end_week_first_year - start_week + 1 + week
        
    return ordinal_week

def ordinal_week_to_week(season, ordinal_week):
    if season == '2009Pan':
        first_year = 2009
    else:
        first_year = int(season) - 1
        
    start_week, start_year = date_to_mmwr_week(datetime.strptime(start_dates[str(season)], '%Y-%m-%d').date())
    end_week_first_year, end_week_year =  date_to_mmwr_week(datetime.strptime(str(first_year) + '-12-28', '%Y-%m-%d').date())

    
    end_ordinal_week = end_week_first_year - start_week
    
    if ordinal_week <= end_ordinal_week:
        week = start_week + ordinal_week
    else:
        week = ordinal_week - end_ordinal_week
        
    return week

def week_to_year(season, week):
    if season == '2009Pan':
        first_year = 2009
    else:
        first_year = int(season) - 1
        
    start_week, start_year = date_to_mmwr_week(datetime.strptime(start_dates[str(season)], '%Y-%m-%d').date())
    end_week_first_year, end_week_year =  date_to_mmwr_week(datetime.strptime(str(first_year) + '-12-28', '%Y-%m-%d').date())

    
    if week >= start_week and week <= end_week_first_year:
        year = first_year
    else:
        year = first_year + 1
        
    return year

start_dates = {str(season): str(season) + '-07-01' for season in range(1918, 2019)}
start_dates['2009Pan'] = '2009-07-01'

In [5]:
vaccine_timings = pd.read_csv('../raw_data/Marshfield_vaccination_timing.csv')
new_rows = []
temp_df = pd.DataFrame()
for season, df in vaccine_timings.groupby('Vaccine type'):
    if season == '2009Pan':
        season_init = 2009
    else:
        season_init = int(season) - 1
    
    plotdf = df.copy()
    plotdf['Ordinal_week'] = [week_to_ordinal_week(season, w) for w in plotdf.Week]
    plotdf = plotdf.sort_values('Ordinal_week')[['Week', 'PMF', 'Ordinal_week', 'Year']]
    temp_df = temp_df.append(plotdf)
    
    for birth_year in range(1918, season_init + 1):
        season_experienced = 0
        for index, row in plotdf.iterrows():
            demo = weekly_demo_function(row.Year,
                                        row.Week, 
                                        birth_year)
            season_experienced += row.PMF * demo
            
        if season_experienced == sum(plotdf.PMF):
            season_experienced = 1
            
        if season == '2009Pan':
            season_float = 2009.5
        else:
            season_float = int(season)
        new_rows.append([season_float, birth_year, season_experienced])

# Set vaccination profile in seasons prior to 2008 to average profile
averages = temp_df.groupby('Ordinal_week').mean()
averages = averages / averages.sum()
for season in range(1918, 2008):
    season_init = season - 1
    for birth_year in range(1918, season_init + 1):
        season_experienced = 0
        for index, row in averages.iterrows():
            wk = ordinal_week_to_week(int(season), index)
            yr = week_to_year(int(season), wk)

            demo = weekly_demo_function(yr,
                                        wk, 
                                        birth_year)
            season_experienced += row.PMF * demo

        if season_experienced == sum(averages.PMF):
            season_experienced = 1
        season_float = int(season)
        new_rows.append([season_float, birth_year, season_experienced])
        
vac_exposed = pd.DataFrame(new_rows, columns=['Season', 'Birth_year', 'Frac_exposed_to_vaccination'])

In [7]:
vac_exposed.to_csv('../data/vaccination_scalings.csv', index=False)