# Cox Proportional Hazards Model

## Load libraries and define settings

In [67]:
# general dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
from pathlib import Path
import warnings

from lifelines import CoxPHFitter
from lifelines.datasets import load_rossi

In [68]:
# ignore deprecation warning
warnings.filterwarnings("ignore", category=DeprecationWarning) 

# autoreload changes from local files
%load_ext autoreload
%autoreload 2

# pandas show full output
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 200)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [71]:
from src import cox

## Load example dataset

In [72]:
df = pd.DataFrame({
    'patient': [1, 2, 3],
    'time': [5, 3, 4],
    'event': [1, 1, 0],
    'x1': [2, 1, 3],
    'x2': [1, 2, 1]
})
df


Unnamed: 0,patient,time,event,x1,x2
0,1,5,1,2,1
1,2,3,1,1,2
2,3,4,0,3,1


In [74]:
df_rossi = load_rossi()
df_rossi.head()

Unnamed: 0,week,arrest,fin,age,race,wexp,mar,paro,prio
0,20,1,0,27,1,0,0,1,3
1,17,1,0,18,1,0,0,1,8
2,25,1,0,19,0,1,0,1,13
3,52,0,1,23,1,1,1,1,1
4,52,0,0,19,0,1,0,1,3


## Cox Model
Run the cox model on the simple dataset of 3 records and the lifelines rossi dataset. Compare the latter with the cox model implementation of lifelines.

In [81]:
# model params
init_beta = [0.1, 0.2]
max_iter = 100
solver= "Newton-CG"
verbose = 1

# data params
event_column = 'event'
duration_column = 'time'
covariate_columns = ['x1', 'x2']

cox_ph = cox.CoxPH(init_coef=init_beta, max_iter=max_iter, solver=solver, verbose=verbose)
cox_ph.fit(data=df, event_column=event_column, duration_column=duration_column, covariate_columns=covariate_columns)

Pre-training negative log partial likelihood: 1.0663774224850238
Fit model with coefficients: [-5.14143894  4.73146531]
Post-training negative log partial likelihood: 5.185299018872769e-05


<src.cox.CoxPH at 0x31061f9d0>

In [88]:
# model params
max_iter = 100
solver= "Newton-CG"
verbose = 0

# data params
event_column = 'arrest'
duration_column = 'week'

# custom iompelmentation
cph_custom  = cox.CoxPH(max_iter=max_iter, solver=solver, verbose=verbose)
cph_custom.fit(data=df_rossi, event_column=event_column, duration_column=duration_column)

# lifelines
cph_lifelines = CoxPHFitter()
cph_lifelines.fit(df_rossi, duration_col=duration_column, event_col=event_column)

print(f"lifelines coefficients: {cph_custom.coef_}")
print(f"custom coefficients: {cph_lifelines.params_.values}")

lifelines coefficients: [-0.3759673  -0.0571687   0.30715241 -0.15497119 -0.43674487 -0.07910215
  0.09244658]
custom coefficients: [-0.37942217 -0.05743774  0.31389979 -0.1497957  -0.43370388 -0.08487108
  0.09149708]
