In [35]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from survival import DecayMIRModel
from survival import MIRModel
from survival.age_group_extension import AgeSurvivalModel
from survival.three_parameter_log_form import LogFormMIRModel

## INPUT DATA

### [GBD 2019 MIRs with GBD 2019 background mortality]

In [None]:
#df = pd.read_csv('/home/j/temp/fed1/2019_mirs_with_background_mortality_ALL_only.csv')

### [GBD 2020 v 89 MIRs with GBD 2020 background mortality]

In [36]:
df = pd.read_csv('/home/j/temp/fed1/2020_mirs_with_background_mortality_ALL_only.csv')

In [37]:
#PROCESS INPUTS
#remove NAs and MIR >1
df = df.dropna()
df = df[df['mi_ratio']<=1]
df = df[df['age_group_id']!=22]
df = df[df['age_group_id']!=28]
df = df[df['age_group_id']!=2]
df = df[df['age_group_id']!=3]
df = df[df['age_group_id']!=4]
df = df[df['age_group_id']!=5]

df.head()

Unnamed: 0.1,Unnamed: 0,age_group_id,sex_id,year_id,location_id,acause,mi_ratio,cause_id,measure_id,metric_id,age_group_name,cause_name,expected,location_name,location_type,measure_name,metric_name,sex,background_mortality
0,1,1,1,1990,6,neo_leukemia_ll_acute,0.736437,294,1,3,Under 5,All causes,False,China,admin0,Deaths,Rate,Male,0.010942
1,2,1,1,1990,7,neo_leukemia_ll_acute,0.747966,294,1,3,Under 5,All causes,False,Democratic People's Republic of Korea,admin0,Deaths,Rate,Male,0.009047
2,3,1,1,1990,8,neo_leukemia_ll_acute,0.254112,294,1,3,Under 5,All causes,False,Taiwan (Province of China),admin0,Deaths,Rate,Male,0.001818
3,4,1,1,1990,10,neo_leukemia_ll_acute,0.951282,294,1,3,Under 5,All causes,False,Cambodia,admin0,Deaths,Rate,Male,0.029895
4,5,1,1,1990,11,neo_leukemia_ll_acute,0.890948,294,1,3,Under 5,All causes,False,Indonesia,admin0,Deaths,Rate,Male,0.018975


In [38]:
# Subset to the pediatric age groups and test on a single year (2019)
df = df[df['age_group_id']<9]
df = df[df['year_id']==2020]

## I. Base MIR -> Survival Model:

#### [M/I = (1-P_s^n)(P_c)/(P_c+P_o)]

In [24]:
#PARAMETERS
num_years = 5
disease_pd = 10

#RUN BASE MODEL
model = MIRModel(df['mi_ratio'],
                 df['background_mortality'],
                 disease_period=disease_pd)
model.compute_excess_mortality()
survival_rate = model.get_survival_rate(num_years=num_years)

# SAVE IN ORIGINAL DATA FRAME
df['excess_mortality_og_mir_model'] = model.excess_mortality
df['abs_survival_rate_og_mir_model'] = survival_rate['abs']
df['rel_survival_rate_og_mir_model'] = survival_rate['rel']

## IV. Three parameter decay model:

#### [log(P_c) = b*log(1+exp(-(x-a)))+c]

In [39]:
df['age_name']="00-04 years"
df.loc[df['age_group_id']==6,'age_name']="05-09 years"
df.loc[df['age_group_id']==7,'age_name']="10-14 years"
df.loc[df['age_group_id']==8,'age_name']="15-19 years"

In [40]:
parameters.cause_name.unique()

array(['Oral Cavity and Pharynx', 'Nasopharynx',
       'Liver and Intrahepatic Bile Duct', 'Pancreas',
       'Nose, Nasal Cavity and Middle Ear', 'Lung and Bronchus',
       'Bones and Joints', 'Soft Tissue including Heart',
       'Skin excluding Basal and Squamous', 'Melanoma of the Skin',
       'Other Non-Epithelial Skin', 'Ovary', 'Prostate', 'Testis',
       'Urinary Bladder', 'Kidney and Renal Pelvis', 'Eye and Orbit',
       'Brain and Other Nervous System', 'Non-Hodgkin Lymphoma',
       'Leukemia', 'Acute Lymphocytic Leukemia',
       'Chronic Lymphocytic Leukemia', 'Acute Myeloid Leukemia',
       'Chronic Myeloid Leukemia', 'Other Leukemia', 'Colon and Rectum',
       'Hodgkin Lymphoma', 'Stomach', 'Corpus and Uterus, NOS', 'Thyroid',
       'Gallbladder', 'Larynx', 'Breast', 'Cervix Uteri', 'Myeloma',
       'Mesothelioma', 'Esophagus'], dtype=object)

In [41]:
# merge in the fitted parameters
parameters = pd.read_csv('/homes/fed1/Survival_Franny/three_parameter_log_form_values_SEER_6_18_2021_age_specific.csv')
df = df.merge(parameters[parameters['cause_name']=='Acute Lymphocytic Leukemia'], how="left", on=['age_name'])

In [42]:
num_years = 5

df['n']=15

# RUN THREE PARAMETER LOG MODEL
three_param_model = LogFormMIRModel(df['mi_ratio'],
                                    df['background_mortality'],
                                    df['a'],
                                    df['b'],
                                    df['n'])
three_param_model.compute_third_parameter()


In [43]:
survival_rate = three_param_model.get_survival_rates(num_years=num_years)

# SAVE IN ORIGINAL DATA FRAME
df['predicted_c'] = three_param_model.c
df['abs_survival_rate_three_param_model'] = survival_rate['abs']
df['rel_survival_rate_three_param_model'] = survival_rate['rel']

## Comparison to Current GBD Methods

In [44]:
df['rel_survival_old_gbd_model'] = 1-df['mi_ratio']

## Save results

In [45]:
df.to_csv('/home/j/temp/fed1/comparing_ALL_survival_methods_2020_inputs_6_18_2021_n_15_age_specific.csv')

In [None]:
df.head()