# Survival Analysis & Censored Data

### Loading Libraries

In [None]:
# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data Visualization
import matplotlib.pyplot as plt
from matplotlib.pyplot import subplots

# ISLP
from ISLP import load_data
from ISLP.survival import sim_time
from ISLP.models import ModelSpec as MS

In [None]:
from lifelines import (KaplanMeierFitter, CoxPHFitter)

from lifelines.statistics import (logrank_test, multivariate_logrank_test)

### Brain Cancer Data

In [None]:
BrainCancer = load_data('BrainCancer')

BrainCancer.columns

In [None]:
BrainCancer['sex'].value_counts()

In [None]:
BrainCancer['diagnosis'].value_counts()

In [None]:
BrainCancer['status'].value_counts()

In [None]:
fig , ax = subplots (figsize=(8, 8))

km = KaplanMeierFitter()

km_brain = km.fit( BrainCancer ['time'], BrainCancer['status'])
km_brain .plot(label='Kaplan Meier estimate', ax=ax)
plt.grid(True)
plt.show()

In [None]:
fig , ax = subplots(figsize =(8, 8))
by_sex = {}

for sex, df in BrainCancer.groupby('sex'):
        km_sex = km.fit(df['time'], df['status'])
        by_sex[sex] = df
        km_sex.plot(label='Sex =%s' % sex, ax=ax)

plt.grid(True)
plt.show()

In [None]:
logrank_test (by_sex['Male']['time'],
              by_sex['Female']['time'],
              by_sex['Male']['status'],
              by_sex['Female']['status'])

In [None]:
coxph = CoxPHFitter 

sex_df = BrainCancer[['time', 'status', 'sex']]

model_df = MS(['time', 'status', 'sex'],
              intercept=False).fit_transform(sex_df)

cox_fit = coxph().fit(model_df,
                      'time',
                      'status')

cox_fit.summary[['coef', 'se(coef)', 'p']]

In [None]:
cox_fit.log_likelihood_ratio_test()

In [None]:
cleaned = BrainCancer.dropna()
all_MS = MS(cleaned.columns, intercept=False)
all_df = all_MS.fit_transform(cleaned)
fit_all = coxph().fit(all_df,
                      'time',
                      'status')

fit_all.summary [['coef', 'se(coef)', 'p']]

In [None]:
levels = cleaned['diagnosis'].unique()

def representative (series):
    if hasattr(series.dtype, 'categories'):
        return pd.Series.mode(series)
    else:
        return series.mean()

modal_data = cleaned.apply(representative, axis =0)

In [None]:
modal_df = pd.DataFrame(
    [modal_data.iloc[0] for _ in range(len(levels))])

modal_df['diagnosis'] = levels

modal_df

In [None]:
modal_X = all_MS.transform(modal_df)
modal_X.index = levels

modal_X

In [None]:
predicted_survival = fit_all.predict_survival_function(modal_X)

predicted_survival

In [None]:
fig, ax = subplots (figsize=(8, 8))

predicted_survival.plot(ax=ax);
plt.grid(True)
plt.show()

### Publication Data

In [None]:
fig, ax = subplots(figsize=(8 ,8))

Publication = load_data('Publication')
by_result = {}

for result, df in Publication.groupby('posres'):
    by_result[result] = df
    km_result = km.fit(df['time'], df['status'])
    km_result.plot(label='Result =%d' % result, ax=ax)

plt.grid(True)
plt.show()

In [None]:
posres_df = MS(['posres',
                'time',
                'status'],
               intercept =False).fit_transform(Publication)

posres_fit = coxph().fit(posres_df,
                         'time',
                         'status')

posres_fit.summary[['coef', 'se(coef)', 'p']]

In [None]:
model = MS(Publication.columns.drop('mech'),
           intercept=False)

coxph().fit(model.fit_transform(Publication),
            'time',
            'status').summary[['coef', 'se(coef)', 'p']]

### Call Center Data