In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pysam
from collections import Counter
from matplotlib import cm
from matplotlib.colors import ListedColormap

import io
from pysam import VariantFile
from matplotlib_venn import venn3
import venn

# Tumor burden

In [None]:
# tumor burden
tf_file_1 = pd.read_csv('../data/tumor_burden/tumor_burden_ichorcna_CCG.txt', header=None)
tf_file_1['patient'] = tf_file_1[0].str.split('.').str[0].str.split('_').str[-1]
tf_file_1['date'] = tf_file_1[0].str.split('.').str[1]
tf_file_1['patient_date'] = tf_file_1['patient']  + '_' + tf_file_1['date'] 
tf_file_1.set_index('patient_date', inplace=True)
tf_file_1.drop(0, axis=1, inplace=True)
tf_file_1.rename(columns={1: 'tumor_burden'}, inplace=True)

tf_file_2 = pd.read_csv('../data/tumor_burden/tumor_burden_ichorcna_CRC.txt', header=None)
tf_file_2['patient'] = tf_file_2[0].str.split('-').str[1].str.split('_').str[0]
tf_file_2['date'] = tf_file_2[0].str.split('_').str[1]
tf_file_2['patient_date'] = tf_file_2['patient'] + '_' + tf_file_2['date'] 
tf_file_2.set_index('patient_date', inplace=True)
tf_file_2.drop(0, axis=1, inplace=True)
tf_file_2.rename(columns={1: 'tumor_burden'}, inplace=True)

print(tf_file_1.shape, tf_file_2.shape)

tf_file = pd.concat([tf_file_1, tf_file_2])
tf_file['patient'] = tf_file['patient'].astype(int)
tf_file['date'] = pd.to_datetime(tf_file['date'], format='%d%m%y')
tf_file.reset_index(inplace=True)
tf_file.drop('patient_date', axis=1, inplace=True)
tf_file

# Treatment

In [None]:
treatment_file = pd.read_csv('../data/treatment/patient_treatment_total_std_201109.txt', sep='\t')
treatment_file['patient'] = treatment_file['patient'].astype(int)
treatment_file['date'] = pd.to_datetime(treatment_file['date'], format='%Y-%m-%d')
treatment_file.rename(columns={'value':'treatment'}, inplace=True)
treatment_file = treatment_file[['patient', 'date', 'treatment']]
treatment_file

## Patient 986

In [None]:
date_deepwgs_1 = '100215'
date_deepwgs_2 = '261016'

patient = 986
treatment_patient = treatment_file[treatment_file['patient'] == patient].sort_values('date')
treatment_patient['date'] = treatment_patient['date'].astype(str)
tf_patient = tf_file[tf_file['patient'] == patient].sort_values('date')
tf_patient['date'] = tf_patient['date'].astype(str)
df_patient = pd.concat([treatment_patient, tf_patient])
df_patient = df_patient.sort_values('date')
df_patient['date'] = df_patient['date'].astype(str)
df_patient['tumor_burden'] = df_patient['tumor_burden'].fillna(method='ffill')

In [None]:
plt.figure()
sns.set(font_scale=2)
fig, ax = plt.subplots(figsize=(50,10))
ax.plot(df_patient['date'], df_patient['tumor_burden'], 'k-', linewidth=4)
ax.plot(tf_patient['date'], tf_patient['tumor_burden'], 'k.', marker='s', markersize=20)
ax.set_ylabel('tumor burden', fontsize = 25.0)
labels = ax.axes.get_xticklabels()
ax.axes.set_xticklabels(labels, rotation=45, size = 20.0)
# twin object for two different y-axis on the sample plot
ax2=ax.twinx()
# make a plot with different y-axis using second axis object
sns.stripplot(y='treatment', x='date', hue='treatment', data=df_patient, s=20, ax=ax2) 
plt.legend((), ())
plt.savefig('../figures/oncosg_timeline_patient'+str(patient)+'.png')

In [None]:
# check tf dates
tf_patient[['date', 'tumor_burden']]

## Patient 809

In [None]:
date_deepwgs_1 = '110914'
date_deepwgs_2 = '030915'

patient = 809
treatment_patient = treatment_file[treatment_file['patient'] == patient].sort_values('date')
treatment_patient['date'] = treatment_patient['date'].astype(str)
tf_patient = tf_file[tf_file['patient'] == patient].sort_values('date')

# data from oncosg added manually
tf_patient = tf_patient.append({'date': '2014-09-11', 'patient': 809, 'tumor_burden': 0.3577}, ignore_index=True)
tf_patient = tf_patient.append({'date': '2015-09-03', 'patient': 809, 'tumor_burden': 0.4626}, ignore_index=True)

tf_patient['date'] = tf_patient['date'].astype(str)
df_patient = pd.concat([treatment_patient, tf_patient])
df_patient = df_patient.sort_values('date')
df_patient['date'] = df_patient['date'].astype(str)
df_patient['tumor_burden'] = df_patient['tumor_burden'].fillna(method='ffill')

In [None]:
plt.figure()
sns.set(font_scale=2)
fig, ax = plt.subplots(figsize=(50,10))
ax.plot(df_patient['date'], df_patient['tumor_burden'], 'k-', linewidth=4)
ax.plot(tf_patient['date'], tf_patient['tumor_burden'], 'k.', marker='s', markersize=20)
ax.set_ylabel('tumor burden', fontsize = 25.0)
ax.set_ylim([0, 0.6])
labels = ax.axes.get_xticklabels()
ax.axes.set_xticklabels(labels, rotation=45, size = 20.0)
# twin object for two different y-axis on the sample plot
ax2=ax.twinx()
# make a plot with different y-axis using second axis object
sns.stripplot(y='treatment', x='date', hue='treatment', data=df_patient, s=20, ax=ax2) 
plt.legend((), ())
plt.savefig('../figures/oncosg_timeline_patient'+str(patient)+'.png')

In [None]:
tf_patient

In [None]:
tf_file['patient'].unique()