In [1]:
# Use DeepSurv from the repo
import sys
sys.path.append('../deepsurv')
import deep_surv

import utils

import numpy as np
import pandas as pd

from glob import glob
import os
import re



# Read in the model from results


In [2]:
%%capture
results_dir = '../experiments/results_2022-01-29B'

experiments = [
	'liver_os_sbrtVSnone',
	'liver_os_rfaVSnone',
	'liver_os_sbrtORrfa',
	'liver_os_sbrtVSrfa',
	'liver_pfs_sbrtVSnone',
	'liver_pfs_rfaVSnone',
	'liver_pfs_sbrtORrfa',
	'liver_pfs_sbrtVSrfa'
]

models = {}

for experiment in experiments:
	pattern = f'{results_dir}/{experiment}/models/*.h5'

	if not experiment in models or not models[experiment]:
		models[experiment] = []

	for weights_fp in glob(pattern):
		print(weights_fp)
		model_fp = f'../experiments/deepsurv/models/{os.path.splitext(os.path.splitext(os.path.basename(weights_fp))[0])[0]}.json'
		# models[experiment].append({'model_fp': model_fp, 'weights_fp': weights_fp})
		models[experiment].append(deep_surv.load_model_from_json(model_fp, weights_fp))
		


In [15]:
data_dir = '../experiments/data/liver'
df_os = pd.read_csv(os.path.join(data_dir, '220123Liver-OS.csv'), dtype=np.float32)
df_pfs = pd.read_csv(os.path.join(data_dir, '220123Liver-PFS.csv'), dtype=np.float32)

recommendations = {}

for key in models:
	m = re.match(r'liver_(?P<outcome>.*)_(?P<comparison>.*)', key)
	if not m:
		continue

	df = df_os.drop(['OSEvent', 'TTDy'], axis='columns') if m.group('outcome') == 'os' else df_pfs.drop(['PFSEvent', 'TTPy'], axis='columns')
	if 'sbrtVSnone' in key:
		drop_cols, drop_rows = ['Treatment', 'RFA', 'SBRT_OR_RFA'], ['RFA']
		drop_idx = df.where((df.loc[:, drop_rows] == 1).any(axis='columns')).dropna().index
	elif 'sbrtVSrfa' in key:
		drop_cols, drop_rows = ['Treatment', 'RFA', 'SBRT_OR_RFA'], ['SBRT_OR_RFA']
		drop_idx = df.where((df.loc[:, drop_rows] == 0).any(axis='columns')).dropna().index
	elif 'rfaVSnone' in key:
		drop_cols, drop_rows = ['Treatment', 'SBRT', 'SBRT_OR_RFA'], ['SBRT']
		drop_idx = df.where((df.loc[:, drop_rows] == 1).any(axis='columns')).dropna().index
	elif 'sbrtORrfa' in key:
		drop_cols, drop_rows = ['Treatment', 'SBRT', 'RFA'], []
		drop_idx = df.where((df.loc[:, drop_rows] == 1).any(axis='columns')).dropna().index
	print(key)
	df = df.drop(drop_idx, axis='rows').drop(drop_cols, axis='columns')


	data_fp = f'220123Liver-{"OS" if "os" in key else "PFS"}_Tx{m.group("comparison").swapcase()}.hd5'
	print(os.path.join(data_dir, f'../{key}/{data_fp}'))

	for model in models[key]:
		if not key in recommendations or not recommendations[key]:
			recommendations[key] = []

		# model = deep_surv.load_model_from_json(fp['model_fp'], fp['weights_fp'])

		# standardize dataset
		# df_norm = df.copy().values
		# df_norm = (df_norm - norm_vals['mean']) / norm_vals['std']

		datasets = utils.load_datasets(os.path.join(data_dir, f'../{key}/{data_fp}'))
		norm_vals = {
				'mean': datasets['train']['x'].mean(axis=0),
				'std': datasets['train']['x'].std(axis=0)
		}
		df_norm = utils.standardize_dataset(datasets['test'], norm_vals['mean'], norm_vals['std'])

		print(model)
		rec = model.recommend_treatment(df_norm['x'], 1, 0, trt_idx=0)
		print(datasets['test']['x'].shape)
		# df['logLambdaDiff'] = pd.Series(np.squeeze(rec), index=df.index)
		# recommendations[key].append(
		# 	{
		# 		'idx': [df.index[np.squeeze(np.argwhere(np.all(df.values == row, axis=1)))] for row in datasets['test']['x']],
        #         'logLambdaDiff': np.squeeze(rec),
        #         'rec': np.squeeze(rec < 0)
        #     }
		# )
		break
		print(f'No. of treatment recommendations: {np.count_nonzero(rec < 0) / len(rec) * 100:03.0f}% ({np.count_nonzero(rec < 0)} / {len(rec)})')
	break
	print('-'*80)


liver_os_sbrtVSnone
../experiments/data/liver/../liver_os_sbrtVSnone/liver_os_sbrtVSnone.hd5
<deep_surv.DeepSurv object at 0x1047335e0>
(94, 14)


In [None]:
# recommendations

with pd.ExcelWriter(f'Recommendations.xlsx', engine='xlsxwriter') as writer:
	for key in recommendations:
		count = 0
		for d in recommendations[key]:
			id = f'{key}_{count}'
			worksheet = writer.book.add_worksheet(id)
			writer.sheets[id] = worksheet
			pd.DataFrame(d).to_excel(writer, id, index=False)
			count += 1


In [19]:
print(datasets['train']['x'].shape)
print(rec.shape)
colnames = ['cTxt'] + [f'c{i}' for i in range(datasets['train']['x'].shape[1]-1)] + ['TxtRec']
df = pd.DataFrame(np.append(datasets['train']['x'], rec, axis=1), columns=colnames)
df


(94, 14)
(94, 1)


Unnamed: 0,ReceivedTxt,c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,TxtRec
0,1.0,62.0,0.0,5.0,2.0,1.0,2.0,2.0,1.0,2.0,1.8,3.0,2.0,0.0,-0.290467
1,1.0,57.0,0.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,4.9,2.0,1.0,0.0,-0.351006
2,1.0,66.0,0.0,3.0,2.0,3.0,2.0,2.0,2.0,1.0,1.3,1.0,1.0,0.0,-0.282377
3,1.0,65.0,1.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,3.0,3.0,1.0,0.0,-0.256968
4,1.0,81.0,0.0,4.0,2.0,1.0,1.0,1.0,1.0,3.0,2.7,2.0,3.0,0.0,-0.342319
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,0.0,53.0,0.0,5.0,2.0,3.0,2.0,2.0,2.0,3.0,2.2,3.0,1.0,0.0,-0.296010
90,0.0,60.0,0.0,5.0,1.0,1.0,2.0,2.0,2.0,1.0,2.3,3.0,1.0,1.0,-0.316676
91,0.0,65.0,1.0,2.0,2.0,1.0,2.0,2.0,2.0,9.0,6.7,3.0,2.0,1.0,-0.320025
92,0.0,60.0,1.0,1.0,2.0,1.0,3.0,3.0,3.0,2.0,2.2,3.0,2.0,1.0,-0.439841
