# Randomly sample ILI and fraction A to create a distribution of intensity scores

In [5]:
import pandas as pd
import datetime
import pymmwr
from matplotlib import pyplot as plt
from matplotlib import rcParams
import numpy as np
import sqlite3
from collections import defaultdict
from scipy import stats
import seaborn as sns
%matplotlib inline

def season_float_to_label(season):
    if season==2009.5:
        label = '2009Pan'
    else:
        label = str(int(season)-1) + '-' + str(int(season))
    return label

def get_max_week(year):
    '''
    Given a year, gets the maximum MMWR week for that year (either 52 or 53)
    '''
    birth_year_max_week = 0
    for day in range(1, 32):
        week = pymmwr.date_to_epiweek(datetime.date(year, 12, day)).week
        if week > birth_year_max_week:
            birth_year_max_week = week
    return birth_year_max_week


def weekly_demo_function(year,
                         week, 
                         birth_year,
                         waning_time_days=180):
    
    '''
    Given a birth year, a year, and an mmwr week in that year, this returns
    the fraction of the population born in that birth year that experiences that
    partciular week.
    '''
    
    # Converts the waning period into a datetime timedelta object
    waning_period = datetime.timedelta(waning_time_days)
    
    # Calculates the effective first day that people in a particular birth year are susceptible and the
    # last day that someone born in that year wanes
    initial_day = pymmwr.date_to_epiweek(datetime.date(birth_year, 1, 1) 
                                         + waning_period)
    final_day = pymmwr.date_to_epiweek(datetime.date(birth_year, 12, 31) 
                                         + waning_period)
    
    birth_year_max_week = get_max_week(birth_year)
   
    # Turns those days into weeks
    initial_week = (initial_day.year, initial_day.week)
    final_week = (final_day.year, final_day.week)
    
    # If the week in question is before maternal waning has occurred then no individual in that
    # birth year class is "exposed"
    if (year, week) < initial_week:
        demo_frac = 0
    
    # If we're past the last day of waning, then everyone is exposed
    elif (year, week) >= final_week:
        demo_frac = 1
    
    # Otherwise, we need to calculate a fraction of the population exposed
    else:
        if year == birth_year:
            multiplier = int(week) - int(initial_week[1]) + 1
        elif year == birth_year + 1:
            multiplier = (int(birth_year_max_week) - int(initial_week[1]) + 1) + int(week)
        demo_frac = (multiplier / birth_year_max_week)

    return demo_frac 


def get_intensity(year,
                  week,
                  weekly_incidence,
                  average_incidence):
    '''
    Given a year and a week, returns the intensity of that particular week
    relative to the entire flu season
    '''
    
    index = year
    try:
        intensity = weekly_incidence.loc[index, ]
        intensity = intensity[intensity.WEEK == week].iloc[0, ]['PMF_A']
    except:
        intensity = average_incidence[week]
    return intensity

def rescale_intensity(birth_year,
                      weekly_incidence,
                      average_incidence,
                      waning_period=180,
                      season_start=40,
                      pandemic_start=15,
                      pandemic_end=47,
                      final_year=2018):
    
    seasons = list(range(birth_year, final_year + 1))
    
    final_intensity_multipliers = pd.DataFrame(columns=['birth_year',
                                                        'season_float',
                                                        'frac_exposed'])
    
    # We need to append the pandemic season for people born before the pandemic
    if birth_year <= 2009:
        seasons.append(2009.5)
        
    for season in seasons:
        weighted_average = 0
        # If we're not dealing with the pandemic season or seasons adjacent to it
        # then we can assume a normal season start week.
        if season not in [2009, 2009.5, 2010]:
            final_week = get_max_week(season)
            if season == birth_year:
                for week in range(1, season_start):
                    remaining_intensity = get_intensity(season,
                                                        week,
                                                        weekly_incidence,
                                                        average_incidence)

                    demo_fraction = weekly_demo_function(season,
                                                         week, 
                                                         birth_year)
                    weighted_average += remaining_intensity * demo_fraction
                    
            elif season == birth_year + 1:
                for week in range(season_start, final_week + 1):
                    remaining_intensity = get_intensity(season - 1,
                                                        week,
                                                        weekly_incidence,
                                                        average_incidence)

                    demo_fraction = weekly_demo_function(season - 1,
                                                         week, 
                                                         birth_year)


                    weighted_average += remaining_intensity * demo_fraction

                for week in range(1, season_start):
                    remaining_intensity = get_intensity(season,
                                                        week,
                                                        weekly_incidence,
                                                        average_incidence)

                    demo_fraction = weekly_demo_function(season,
                                                         week, 
                                                         birth_year)

                    weighted_average += remaining_intensity * demo_fraction
            else:
                weighted_average = 1
                
        # Need to consider special case of the season prior to the pandemic
        elif season == 2009:
            final_week = get_max_week(season)
            if birth_year == 2009:
                for week in range(1, pandemic_start):
                    remaining_intensity = get_intensity(season,
                                                        week,
                                                        weekly_incidence,
                                                        average_incidence)

                    demo_fraction = weekly_demo_function(season,
                                                         week, 
                                                         birth_year)
                    weighted_average += remaining_intensity * demo_fraction
            

            elif birth_year == 2008:
                for week in range(season_start, final_week + 1):
                    remaining_intensity = get_intensity(season - 1,
                                                        week,
                                                        weekly_incidence,
                                                        average_incidence)

                    demo_fraction = weekly_demo_function(season - 1,
                                                         week, 
                                                         birth_year)

                    weighted_average += remaining_intensity * demo_fraction
                
                for week in range(1, pandemic_start):
                    remaining_intensity = get_intensity(season,
                                                        week,
                                                        weekly_incidence,
                                                        average_incidence)

                    demo_fraction = weekly_demo_function(season,
                                                         week, 
                                                         birth_year)

                    weighted_average += remaining_intensity * demo_fraction
            else:
                weighted_average = 1
                
        elif season == 2009.5:
            if birth_year == 2009 or birth_year == 2008:
                for week in range(pandemic_start, pandemic_end + 1):
                    remaining_intensity = get_intensity(2009,
                                                        week,
                                                        weekly_incidence,
                                                        average_incidence)

                    demo_fraction = weekly_demo_function(2009,
                                                         week, 
                                                         birth_year)

                    weighted_average += remaining_intensity * demo_fraction
            else:
                weighted_average = 1
        
        elif season == 2010:
            final_week = get_max_week(season)
            if birth_year == 2010:
                for week in range(1, season_start):
                    remaining_intensity = get_intensity(season,
                                                        week,
                                                        weekly_incidence,
                                                        average_incidence)

                    demo_fraction = weekly_demo_function(season,
                                                         week, 
                                                         birth_year)
                    weighted_average += remaining_intensity * demo_fraction


            elif birth_year == 2009:
                for week in range(pandemic_end + 1, final_week + 1):
                    remaining_intensity = get_intensity(season - 1,
                                                        week,
                                                        weekly_incidence,
                                                        average_incidence)

                    demo_fraction = weekly_demo_function(season - 1,
                                                         week, 
                                                         birth_year)


                    weighted_average += remaining_intensity * demo_fraction

                for week in range(1, season_start):
                    remaining_intensity = get_intensity(season,
                                                        week,
                                                        weekly_incidence,
                                                        average_incidence)

                    demo_fraction = weekly_demo_function(season,
                                                         week, 
                                                         birth_year)

                    weighted_average += remaining_intensity * demo_fraction
            else:
                weighted_average = 1
        
        temp = pd.DataFrame([[birth_year, season, weighted_average]],
                            columns=final_intensity_multipliers.columns)
        final_intensity_multipliers = final_intensity_multipliers.append(temp)
    return final_intensity_multipliers


def get_average_incidence(weekly_incidence, season_start_week=40):
    '''
    Calculates the average weekly incidence.
    '''
    weeks = []
    incidence = []
    max_week = max(weekly_incidence['WEEK'])
    
    # Exclude pandemic and bordering seasons since they are non-standard seasons
    filtered_weekly_incidence = weekly_incidence[~weekly_incidence['SEASON'].isin([2009, 2009.5, 2010])]

    for week, week_df in filtered_weekly_incidence.groupby('WEEK'):
        weeks.append(week)
        incidence.append(np.average(week_df['PMF_A']))

    incidence = np.array(incidence) / np.sum(incidence)
    average_incidence = dict(zip(weeks, incidence))
    return average_incidence

waning_time_days = 180
childhood_end_years = 12
season_start = 40
pandemic_start = 15
pandemic_end = 47
min_birth_year = 1918
max_birth_year = 2018

pandemic_file = '../raw_data/Historic_Flu_Pandemics.csv'
Thompson_data = '../raw_data/Thompson_flu_counts.csv'
cumulative_incidence_file = '../data/weekly_incidence_simplified.csv'
MESA_subtype_file = '../raw_data/subtype_fractions_by_season.csv'


MESA_subtype_fracs = pd.read_csv(MESA_subtype_file, index_col=0)
weekly_iav_incidence = pd.read_csv(cumulative_incidence_file, index_col=0)
weekly_iav_incidence = weekly_iav_incidence.fillna(0)

historical_frac_a = pd.read_csv(Thompson_data)
historical_frac_a.index = [float(s.split('-')[-1]) for s in historical_frac_a.Season]

historical_pandemics = pd.read_csv(pandemic_file, index_col=0)

ave_incidence = get_average_incidence(weekly_iav_incidence)
all_intensity_scalings = pd.DataFrame()

Intensity_and_Frequency = pd.DataFrame(index=list(range(min_birth_year, max_birth_year + 1)) + [2009.5])
Intensity_and_Frequency.index.name = 'Year'

# Calculate Intensity and subtype frequency

In [26]:


for season, row in historical_pandemics.iterrows():
    Intensity_and_Frequency.loc[season, 'H1N1_fraction'] = row.H1N1_fraction
    Intensity_and_Frequency.loc[season, 'H3N2_fraction'] = row.H3N2_fraction
    Intensity_and_Frequency.loc[season, 'H2N2_fraction'] = row.H2N2_fraction

for season, season_df in weekly_iav_incidence.groupby('SEASON'):
    Intensity_and_Frequency.loc[season, 'Mean_ILI'] = np.average(season_df['% WEIGHTED ILI'])
    Intensity_and_Frequency.loc[season, 'Fraction_A'] = np.sum(season_df['TOTAL A']) / np.sum(season_df['TOTAL SPECIMENS'])
    
    if season < 2008 or season >= 2019:
        Intensity_and_Frequency.loc[season, 'H1N1_fraction'] = np.sum(season_df['H1 TOTAL']) / (np.sum(season_df['H1 TOTAL']) + np.sum(season_df['H3 TOTAL']))
        Intensity_and_Frequency.loc[season, 'H3N2_fraction'] = np.sum(season_df['H3 TOTAL']) / (np.sum(season_df['H1 TOTAL']) + np.sum(season_df['H3 TOTAL']))
        Intensity_and_Frequency.loc[season, 'H2N2_fraction'] = 0
    else:
        Intensity_and_Frequency.loc[season, 'H1N1_fraction'] = MESA_subtype_fracs.loc[season, 'H1_MESA']
        Intensity_and_Frequency.loc[season, 'H3N2_fraction'] = MESA_subtype_fracs.loc[season, 'H3_MESA']
        Intensity_and_Frequency.loc[season, 'H2N2_fraction'] = 0


ILI_list = Intensity_and_Frequency.loc[set(weekly_iav_incidence['SEASON']) - set([2009.5]),'Mean_ILI']

ILI_samples = {}
for trial in range(0, 1000):
    temp = Intensity_and_Frequency.copy()
    for season, row in historical_frac_a.iterrows():
        if np.isnan(temp.loc[season, 'Mean_ILI']):
            ILI_frac = np.random.choice(ILI_list, 1)

            temp.loc[season, 'Mean_ILI'] = ILI_frac
            temp.loc[season, 'Fraction_A'] = row.A_total / row.Specimens_tested

            temp.loc[season, 'H1N1_fraction'] = row.H1N1 / (row.H1N1 + row.H3N2)
            temp.loc[season, 'H3N2_fraction'] = row.H3N2 / (row.H1N1 + row.H3N2)
            temp.loc[season, 'H2N2_fraction'] = 0

    # Random sampling for previous years
    A_list = list(temp.Fraction_A.dropna().loc[range(1977, 2020), ])
    ILI_frac = np.random.choice(ILI_list, 1977 - 1918, replace=True)
    A_sample = np.random.choice(A_list, 1977 - 1918, replace=True)
    
    temp.loc[range(1918, 1977), 'Mean_ILI'] = ILI_frac
    temp.loc[range(1918, 1977), 'Fraction_A'] = A_sample
    
    temp['Intensity'] = temp.Mean_ILI * temp.Fraction_A
    ILI_samples[trial] = np.nanmean(temp.Intensity)
    temp['Intensity'] = temp.Intensity / np.nanmean(temp.Intensity)
    temp.index.name = 'season_float'

    
    temp.to_csv('../data/sample_ILI/ILI_sample_%s.csv'%trial)

