In [1]:
import pandas as pd
import numpy as np

# Data loading

### Expression levels

In [2]:
brca_ex = pd.read_hdf('../data/TCGA_gene_exp_20k_std-MAD.h5', key='brca')

In [3]:
brca_ex.shape

(1212, 20000)

In [4]:
import ast
with open('../common_genes_1-5.txt','r') as f:
   common_genes = ast.literal_eval(f.read())

In [5]:
brca_ex = brca_ex[list(common_genes)]

In [6]:
brca_ex.shape

(1212, 1492)

In [7]:
non_collinear_genes = pd.read_csv('non_collinear_genes.csv')
non_collinear_genes = non_collinear_genes[non_collinear_genes.columns[1:]]

In [8]:
non_collinear_genes = list(non_collinear_genes.values[0])

In [9]:
brca_ex = brca_ex[non_collinear_genes]

In [10]:
brca_ex.shape

(1212, 149)

### OS

In [11]:
brca_os = pd.read_hdf('../data/TCGA_data.h5', key='brca_clinical')[['OS', 'OS.time']]

In [12]:
## brca
brca_os['OS.time'] = brca_os['OS.time'].map(lambda x: np.nan if x == 'NaN' else x)
brca_os['OS'] = brca_os['OS'].map(lambda x: np.nan if x == 'NaN' else x)
brca_os.dropna(subset=['OS.time', 'OS'], inplace=True)
brca_os['OS.time'] = brca_os['OS.time'].astype(float)
brca_os['OS'] = brca_os['OS'].astype(float)

In [13]:
brca_os.shape

(1196, 2)

In [14]:
brca_ex = brca_ex.loc[brca_os.index]

In [15]:
sum(brca_ex.index == brca_os.index)

1196

In [16]:
brca_data = pd.concat([brca_ex, brca_os], axis=1, join_axes=[brca_ex.index])

In [17]:
brca_data.shape

(1196, 151)

In [19]:
from lifelines import CoxPHFitter

In [21]:
cph = CoxPHFitter()
cph.fit(brca_data, duration_col='OS.time', event_col='OS', show_progress=True)

cph.print_summary()  # access the results using cph.summary

Iteration 1: norm_delta = 2.43565, step_size = 0.9500, ll = -1141.26077, newton_decrement = 138.69289, seconds_since_start = 0.1
Iteration 2: norm_delta = 1.97209, step_size = 0.9500, ll = -1068.12421, newton_decrement = 77.04372, seconds_since_start = 0.3
Iteration 3: norm_delta = 0.55531, step_size = 0.9500, ll = -1002.28911, newton_decrement = 7.93926, seconds_since_start = 0.4
Iteration 4: norm_delta = 0.12208, step_size = 1.0000, ll = -994.33834, newton_decrement = 0.30007, seconds_since_start = 0.5
Iteration 5: norm_delta = 0.00582, step_size = 1.0000, ll = -994.02799, newton_decrement = 0.00146, seconds_since_start = 0.7
Iteration 6: norm_delta = 0.00005, step_size = 1.0000, ll = -994.02652, newton_decrement = 0.00000, seconds_since_start = 0.9
Iteration 7: norm_delta = 0.00000, step_size = 1.0000, ll = -994.02652, newton_decrement = 0.00000, seconds_since_start = 1.2
Convergence completed after 7 iterations.
<lifelines.CoxPHFitter: fitted with 1196 observations, 998 censored>
 