In [1]:
import pandas as pd
import numpy as np


def get_flu_season(year, week,
                   season_start_week=40,
                   pan_start=15,
                   pan_end=47):
    '''
    Adds flu season to the final dataframe.
    '''
    if year != 2009 and week < season_start_week:
        season = year
        
    elif year == 2009 and week >= pan_start and week <= pan_end:
        season = 2009.5
        
    elif year == 2009 and week < pan_start: 
        season = 2009
        
    elif year == 2009 and week > pan_end:
        season = 2010
        
    elif week >= season_start_week:
        season = year + 1
        
    else:
        season = -1
    return season

ilifile = '../raw_data/ILINet.csv'
prior_to_15_16 = '../raw_data/WHO_NREVSS_Combined_prior_to_2015_16.csv'
public_health_subtype = '../raw_data/WHO_NREVSS_Public_Health_Labs.csv'
clinical_lab_type = '../raw_data/WHO_NREVSS_Clinical_Labs.csv'

prior_1516_columns = ['TOTAL SPECIMENS',
                     'A (2009 H1N1)', 
                     'A (H1)', 
                     'A (H3)', 
                     'A (Subtyping not Performed)', 
                     'A (Unable to Subtype)',
                     'H3N2v',
                     'B']
A_subtype_cols = ['A (2009 H1N1)', 
                  'A (H1)', 
                  'A (H3)', 
                  'A (Unable to Subtype)',
                  'H3N2v']

H1_cols = ['A (2009 H1N1)',
           'A (H1)']
H3_cols = ['A (H3)',
           'H3N2v']
public_health_cols = ['TOTAL SPECIMENS',
                      'A (2009 H1N1)',
                      'A (H3)',
                      'A (Subtyping not Performed)',
                      'H3N2v',
                      'B',
                      'BVic',
                      'BYam']

In [2]:
typedf = pd.read_csv(clinical_lab_type,
                     skiprows=1, 
                     index_col=['YEAR', 'WEEK'])[['TOTAL A', 'TOTAL B', 'TOTAL SPECIMENS']]

subtypedf = pd.read_csv(public_health_subtype,
                        skiprows=1,
                        index_col=['YEAR', 'WEEK'])[public_health_cols]


In [3]:
combineddf = pd.read_csv(prior_to_15_16,
                         skiprows=1,
                         na_values=['X'],
                         index_col=['YEAR', 'WEEK'])[prior_1516_columns]

combineddf['TOTAL A'] = combineddf[['A (2009 H1N1)', 
                                     'A (H1)', 
                                     'A (H3)', 
                                     'A (Subtyping not Performed)', 
                                     'A (Unable to Subtype)',
                                     'H3N2v']].sum(skipna=True, axis=1)
combineddf['TOTAL B'] = combineddf.B

combineddf_type = combineddf[['TOTAL A', 'TOTAL B', 'TOTAL SPECIMENS']]
combineddf_subtype = combineddf[prior_1516_columns[0:-1]]



In [4]:
subtype_final = pd.concat([combineddf_subtype, subtypedf], axis=0, sort=False)

subtype_final['TOTAL A'] = subtype_final[A_subtype_cols].sum(axis=1, skipna=True)
subtype_final['FRACTION H1'] = subtype_final[H1_cols].sum(axis=1, skipna=True).divide(subtype_final['TOTAL A']).fillna(0)
subtype_final['FRACTION H3'] = subtype_final[H3_cols].sum(axis=1, skipna=True).divide(subtype_final['TOTAL A']).fillna(0)
subtype_final['H1 TOTAL'] = subtype_final[H1_cols].sum(axis=1, skipna=True)
subtype_final['H3 TOTAL'] = subtype_final[H3_cols].sum(axis=1, skipna=True)
subtype_final = subtype_final[['FRACTION H1', 'FRACTION H3', 'H1 TOTAL', 'H3 TOTAL']]

In [5]:
type_final = pd.concat([combineddf_type, typedf], axis=0, sort=False)
type_final['FRACTION A'] = type_final['TOTAL A'].divide(type_final['TOTAL SPECIMENS']).fillna(0)
type_final['FRACTION B'] = type_final['TOTAL B'].divide(type_final['TOTAL SPECIMENS']).fillna(0)
type_final['FRACTION POSITIVE'] = type_final['FRACTION A'] + type_final['FRACTION B']
type_final = type_final[['TOTAL A', 'TOTAL SPECIMENS', 'FRACTION A', 'FRACTION B', 'FRACTION POSITIVE']]

In [6]:
ilidf = pd.read_csv(ilifile,
                    skiprows=1,
                    index_col=['YEAR', 'WEEK'])

df = type_final.merge(ilidf, left_index=True, right_index=True).merge(subtype_final, left_index=True, right_index=True)
df = df[['% WEIGHTED ILI',
        'TOTAL A',
        'TOTAL SPECIMENS',
        'FRACTION A',
        'FRACTION B',
        'FRACTION H1',
        'FRACTION H3',
        'H1 TOTAL',
        'H3 TOTAL']]
df['% WEIGHTED ILI'] = df['% WEIGHTED ILI'] / 100

for index, row in df.iterrows():
    year, week = index
    df.loc[index, 'SEASON'] = get_flu_season(year, week)

In [7]:
# Get A intensity
df['INTENSITY_A'] = df[['% WEIGHTED ILI', 'FRACTION A']].product(axis=1)
df['INTENSITY_H1'] = df[['% WEIGHTED ILI', 'FRACTION A', 'FRACTION H1']].product(axis=1)
df['INTENSITY_H3'] = df[['% WEIGHTED ILI', 'FRACTION A', 'FRACTION H3']].product(axis=1)
# Getting PMF
for season, seasondf in df.groupby('SEASON'):
    total = np.sum(seasondf['INTENSITY_A'])
    df.loc[seasondf.index,'PMF_A'] = np.round(seasondf['INTENSITY_A'] / total, 6)
        
# Getting SF
for season, seasondf in df.groupby('SEASON'):
    cumsum = np.round(np.cumsum(seasondf['PMF_A']), 5)
    sd = 1 - np.array([0] + list(cumsum))
    df.loc[seasondf.index,'SF_A'] = sd[0:-1]
        

In [8]:
df.to_csv('../data/weekly_incidence_simplified.csv')