In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
#from jointmodel import sim
import pandas as pd
import sys
import pystan
import survivalstan
from stancache import stancache
from stancache import config

In [None]:
config.set_value(CACHE_DIR='/home/jacquelineburos/.cached_models')

# get simulated data

In [None]:
data = survivalstan.sim.sim_data_jointmodel(N=100)

# confirm that models compile

In [None]:
model = pystan.stanc(file='jointmodel/jointmodel.stan')

In [None]:
model2 = pystan.stanc(file='jointmodel/jointmodel_with_biomarker.stan')

# Fit joint model for competing events

## review event-data

Inspect simulated event data

In [None]:
df = data['events']
df.sort_values(['subject_id', 'time'])\
    .loc[:,['subject_id','time','event_name','event_value']]\
    .head(10)

Confirm multi-event data for a particular subject

In [None]:
df.query('subject_id == 1')

Transform data to "long" format

In [None]:
ldf = survivalstan.prep_data_long_surv(df, event_col='event_value', time_col='time',
                                       sample_col='subject_id', event_name='event_name')

Confirm transformed data for same subject inspected above

In [None]:
ldf.query('subject_id == 1 and (end_death == 1 or end_new_lesion == 1 or end_time > 4.4)').sort_values(['subject_id','end_time'])

Merge event-level data with covariate values

In [None]:
ldf = pd.merge(ldf, data['covars'], on='subject_id', how='outer')

## prepare inputs for stan model

This part is done more manually now since the `survivalstan` code hasn't been written

First we prepare input matrices for terminal event (death).

In [None]:
input_t = survivalstan.SurvivalStanData(df=ldf,
                                        event_col='end_death',
                                        timepoint_end_col='end_time',
                                        sample_col='subject_id',
                                        formula = '~ X1') 

In [None]:
input_t.timepoint_df.describe()

Next we prepare the input matrices for recurrent event (new_lesion).

*in theory, since ids are assigned in a sorted order, the ids should be identical. We will confirm this before moving forward*.

In [None]:
input_r = survivalstan.SurvivalStanData(df=ldf, event_col='end_new_lesion',
                                        timepoint_end_col='end_time',
                                        sample_col='subject_id', 
                                        formula='~ X2')

In [None]:
input_r.timepoint_df.describe()

Confirm that timepoint_ids assigned are identical between the two datasets.

In [None]:
## check whether assigned timepoint ids are indeed identical
timepoints_t = input_t.timepoint_df
timepoints_r = input_r.timepoint_df

merged_timepoints = pd.merge(timepoints_t, timepoints_r, on='timepoint_id', suffixes=['.t', '.r'], how='outer')
merged_timepoints['end_time.diff'] = merged_timepoints.apply(lambda row: row['end_time.t']-row['end_time.r'], axis=1)
assert(all(merged_timepoints['end_time.diff']==0))

In [None]:
assert(len(input_r.df_nonmiss[input_r.df_nonmiss.duplicated(subset=['subject_id','end_time'])].index) == 0)

## combine data inputs for multiple events

Finally we transform the data into a single dictionary to pass into Stan. 

Review keys prepared for each event type:

In [None]:
input_t.data.keys()

In [None]:
input_r.data.keys()

Confirm that items which should be shared between two event types are indeed shared.

In [None]:
for el in ['S','M','N']:
    assert(input_t.data[el] == input_r.data[el])
for el in ['s','t', 't_obs','t_dur']:
    assert((input_t.data[el] == input_r.data[el]).all())

Prepare dictionary to pass into stan.

In [None]:
stan_data = {
    'S': input_t.data['S'],
    'T': input_t.data['T'],
    't_obs': input_t.data['t_obs'],
    't_dur': input_t.data['t_dur'],
    'N': input_t.data['N'],
    's': input_t.data['s'],
    't': input_t.data['t'],
    'M_t': input_t.data['M'],
    'M_r': input_r.data['M'],
    'event_t': input_t.data['event'],
    'event_r': input_r.data['event'],
    'x_t': input_t.data['x'],
    'x_r': input_r.data['x'],
}

## Fit stan model to event-data only

Let's execute this Stan model on our competing-event data.

We wrap the input data in a CustomSurvivalData class so that the `fit_stan_survival_model` as written can function.

In [None]:
class CustomSurvivalData:
    
    def __init__(self, stan_data, x_df, df_nonmiss):
        self.data = stan_data
        self.x_df = x_df
        self.df_nonmiss = df_nonmiss

Finally, we fit the stan model to the simulated data.

In [None]:
test = survivalstan.fit_stan_survival_model(input_data=CustomSurvivalData(stan_data=stan_data,
                                                                          x_df=input_t.x_df,
                                                                          df_nonmiss=input_t.df_nonmiss),
                                            file='jointmodel/jointmodel.stan',
                                            FIT_FUN=stancache.cached_stan_fit,
                                            iter=2000,
                                            chains=4,
                                           )

# Fit joint model with biomarker

## Add biomarker (longitudinal) data to Stan

Next we want to include our biomarker/longitudinal data in the estimation.

First we need to prepare the data matrix to include in our input_data.

Let's review the simulated biomarker data.

In [None]:
data['biomarker'].head()

Merge this with simulated covariate data:

In [None]:
biodf = pd.merge(data['biomarker'], data['covars'], on='subject_id')

Prep input fields to pass to SurvivalStan:

In [None]:
biomarker_data = survivalstan.SurvivalStanData(df=biodf,
                                                  event_col='biomarker_value',
                                                  formula='~ X1 + X2',
                                                sample_col='subject_id',
                                               time_col='biomarker_time'
                                                 )


(review keys)

In [None]:
biomarker_data.data.keys()

Review the non-missing data frame

In [None]:
biomarker_data.df_nonmiss.head()

Update `stan_data` dictionary to include biomarker data

In [None]:
stan_data.update({'N_l': biomarker_data.data['N'],
                 'M_l': biomarker_data.data['M'],
                  'subject_l' : biomarker_data.df_nonmiss['sample_id'].values,
                 'time_l': biomarker_data.df_nonmiss['biomarker_time'].values,
                 'y_l': biomarker_data.data['y'],
                 'x_l': biomarker_data.data['x']})

## Fit stan model on biomarker+event data

Finally we fit a version of our stan model including biomarker data: 

In [None]:
test2 = survivalstan.fit_stan_survival_model(input_data=CustomSurvivalData(stan_data=stan_data,
                                                                          x_df=input_t.x_df,
                                                                          df_nonmiss=input_t.df_nonmiss),
                                             file='jointmodel/jointmodel_with_biomarker.stan',
                                             FIT_FUN=stancache.cached_stan_fit,
                                             iter=2000,
                                             chains=4,
                                             )