In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from survival import MIRModel

In [None]:
# INPUT DATA
input_file = '/home/j/temp/fed1/input_file_with_other_mortality_calculated_from_numbers.csv'
chunks = pd.read_csv(input_file, chunksize=100000)
df = pd.concat(chunks)
df = df.rename(columns={'mi_ratio': 'mir'})
df = df[df['mir'] <= 1]

In [None]:
# CHECK input data columns, need 'Observed' in addition to 'mir', 'other_mortality' columns
#df.head()
#list(df.columns)

### Functions to Compute MSE and Obtain an Optimized Disease Period

In [None]:
def compute_mse(disease_period, num_years, df):
    model = MIRModel(df['mir'],
                 df['other_mortality'],
                 disease_period=disease_period)
    model.compute_excess_mortality()
    survival_rate = model.get_survival_rate(num_years=num_years)
    df['excess_mortality'] = model.excess_mortality
    df['abs_survival_rate'] = survival_rate['abs']
    df['rel_survival_rate'] = survival_rate['rel']
    df['MSE'] = ((df['abs_survival_rate'] - df['Observed']) ** 2)
    return(df)

In [None]:
def get_disease_period(df):
    mse_table = pd.DataFrame(columns = ['disease_period', 'MSE'])
    for i in range(1, 16):
        table = compute_mse(disease_period = i, num_years = 5, df = df)
        mse = sum(table['MSE'])/table['MSE'].count()
        mse_table.loc[i] = [i, mse]
    n_excess_mortality = mse_table[mse_table['MSE'] == min(mse_table['MSE'])]['disease_period']
    if n_excess_mortality.count() == 1:
        return(int(n_excess_mortality))
    else:
        return('NA')

In [None]:
disease_period_table = pd.DataFrame(columns = ['acause', 'location_name', 'age_group_id', 'disease_period'])
i = 1
for cause_name in df['acause'].unique():
    for location in df['location_name'].unique():
        df_sub = df.loc[(df['location_name'] == location) & (df['acause'] == cause_name)]
        for age in df_sub['age_group_id'].unique():
            df_sub_age = df_sub[df_sub['age_group_id'] == age]
            n_excess_mortality = get_disease_period(df_sub_age)
            disease_period_table.loc[i] = [cause_name, location, age, n_excess_mortality]
            i += 1        

In [None]:
#CHECK RESULTS
disease_period_table.head()

In [None]:
# SAVE RESULTS
disease_period_table.to_csv('../results/disease_period_with_age.csv', index = False)