In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch

from load import read_csv

from lifelines import KaplanMeierFitter

from tabulate import tabulate

In [None]:
np.random.seed(1234)
_ = torch.manual_seed(123)

In [None]:
datapath = './Data/data.csv'
data = read_csv(datapath)
print(len(data))

In [None]:
T = data['DAYS_SINCE_DIAGNOSIS']
sites = data.columns[data.columns.str.contains('SITE_')]
median = []
for i, site in enumerate(sites):
    ix = data[site] == 1
    median.append(T[ix].median()/365*12)
to_tab = np.stack((sites, np.round(median,1)), axis=1)
print(tabulate(to_tab, headers=['Site', 'Months']))

In [None]:
T = data['DAYS_SINCE_DIAGNOSIS']
E = data['EVENT']
kmf = KaplanMeierFitter()

sites = data.columns[data.columns.str.contains('SITE_')]
for i, site in enumerate(sites):
    ax = plt.subplot(2, 3, i + 1)
    
    ix = data[site] == 1
    kmf.fit(T[ix], E[ix], label=site)
    kmf.plot_survival_function(ax=ax, legend=False)

    plt.title(site)
    plt.xlabel('Days')
    plt.ylim(0.0, 1.0)
    if i==0:
        plt.ylabel('Fraction alive after x days')    
plt.tight_layout()
plt.savefig('KMcurves.svg', dpi=600,format='svg')
# C70 Malignant neoplasm of meninges
# C71 Malignant neoplasm of brain
# C72 Malignant neoplasm of spinal cord, cranial nerves and other parts of central nervous system

# D32 Benign neoplasm of meninges
# D33 Benign neoplasm of brain and other parts of central nervous system
# D35 Benign neoplasm of other and unspecified endocrine glands