This notebook calculates the minimun number of relapse cells coexisting in the primary at time of diagnosis from the doubling time estimates calculated in relapse_growth_model.ipynb. Results are represented as the barplot from Figure 5 and Additional file 1 Figure S9c

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
import matplotlib.ticker as ticker
import seaborn as sns
from collections import OrderedDict
import json
from aux_data_in_pyvar import config_rcparams

In [None]:
config_rcparams()

In [None]:
## FUNCTIONS
def function_min_cells(rw, db, measure):
    rw['cells_{}'.format(measure)] = (7.5E11*(rw['Relapse_sample_blasts']/100))/(2**(rw['days_between_pry_rel']/db))
    return rw

### mininum number of cells

In [None]:
# get clinical data with days and pathologist blast measures
path_to_tsv = "" # path to the table of Additional file 2 Table S1

df_clinical = pd.read_csv(path_to_tsv, sep='\t')
df_clinical.rename(columns={'Patient_id':'PATIENT'}, inplace=True)
blasts_estimates = df_clinical[['PATIENT','days_between_pry_rel','Relapse_sample_blasts']].dropna()

In [None]:
# open the doubling time estimates obtained from relapse_growth_model.ipynb
with open('../intermediate_files/info_pop_cells.json', 'r') as fp:
    bootstrap_estimates = json.load(fp)

In [None]:
bootstrap_estimates

In [None]:
# calculate the number of cells at time of diagnosis and compare with measures from Li et al., 2020 Blood
doubling_times_blood = {'CI_down':5,'mean':7.4, 'CI_upp':9} 

for m,db in doubling_times_blood.items():
    blasts_estimates = blasts_estimates.apply(lambda x: function_min_cells(x, db, m+'_blood'), axis=1)
    
for m,db in bootstrap_estimates.items():
    blasts_estimates = blasts_estimates.apply(lambda x: function_min_cells(x, db, m+'_estimates'), axis=1)
    
## prepare dataframe for plot
cols = list(blasts_estimates.columns[blasts_estimates.columns.str.contains('cells_')])
cols.extend(['PATIENT'])
df_plot = blasts_estimates[cols]
df_plot.dropna(inplace=True)
df_plot.sort_values("cells_mean_estimates", ascending=False, inplace=True)
df_plot

In [None]:
#df_plot.to_csv("../intermediate_files/num_cells_db.tsv", sep='\t', index=False)

#### Comparing with Blood paper of BALL

In [None]:
fig, ax = plt.subplots(figsize=(12,6))

barWidth = 1

j = 0
xlabels = []

for i,rw in df_plot.iterrows():
    ax.bar(j, rw['cells_mean_blood'], color='#fdc9a2ff', edgecolor='white',#c8b7beff
                width=barWidth, label='7.4 mean doubling time \n B-ALL (Li et al., 2020, Blood)')
    ax.vlines(x=j, ymin=rw['cells_CI_down_blood'], ymax=rw['cells_CI_upp_blood'], color="#4d4d4d")
    ax.bar(j+1, rw['cells_mean_estimates'], color='#fd8d3c', edgecolor='white',
               width=barWidth, label='{} mean doubling time \n T-ALL (in-house cohort)'.format(bootstrap_estimates['mean']))
    ax.vlines(x=j+1, ymin=rw['cells_CI_down_estimates'], ymax=rw['cells_CI_upp_estimates'], color="#4d4d4d")
    xlabels.extend(['',rw['PATIENT'],''])
    j=j+3

ax.hlines(xmin=-1, xmax=j, y=1, linestyles='dashed', color='#808080ff')
ax.hlines(xmin=-1, xmax=j, y=7.5E7, linestyles=':', color='#808080ff')

ax.set_yscale('log', basey=10)
ax.set_ylim(1E-10, 1E12)
ax.set_yticks([1, 1E1,1E2,1E3,1E4,1E5,1E6,1E7,1E8,1E9,1E10,1E11])
ax.set_ylabel("log10(number of relapse \n cells at diagnosis)")

ax.set_xticks([x for x in range(0,j,1)])
ax.set_xticklabels(labels=xlabels,ha='right', rotation=45)
ax.tick_params(axis='x', length=0)

ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
    
# Legend
handles, labels = plt.gca().get_legend_handles_labels()

dashed_line = mlines.Line2D([], [], linewidth=2, linestyle="--", dashes=(3.7, 2), color='#808080ff')
dotted_line = mlines.Line2D([], [], linewidth=2, linestyle=":", color='#808080ff')

handles.append(dotted_line)
labels.append('Clinical refractory threshold')

handles.append(dashed_line)
labels.append('1 relapse founder cell')

ax.legend(handles=handles,bbox_to_anchor=(1,0.5),prop={'size': 8})
by_label = OrderedDict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys(),prop={'size': 10},bbox_to_anchor=(1,0.5))

plt.tight_layout()
fig.savefig("relapse_cells_in_dx_comparative.svg", dpi=300,bbox_inches='tight')
plt.show()

#### Only adult TALL cohort doubling time estimate

In [None]:
df_plot.reset_index(inplace=True, drop=True)

In [None]:
fig, ax = plt.subplots(figsize=(10,6))

barWidth = 0.8


xlabels = []

for i,rw in df_plot.iterrows():
    ax.bar(i, float(rw['cells_mean_estimates']) , color='#fedabfff', edgecolor='#fd8d3c',linewidth=2,
               width=barWidth, label='{} mean doubling time \n T-ALL (in-house cohort)'.format(bootstrap_estimates['mean']))
    ax.vlines(x=i, ymin=rw['cells_CI_down_estimates'], ymax=rw['cells_CI_upp_estimates'], color="#4d4d4d")
    xlabels.append(rw['PATIENT'])
   

ax.hlines(xmin=-1, xmax=i+1, y=1, linestyles='dashed', color='#808080ff') # 1 cell
ax.hlines(xmin=-1, xmax=i+1, y=7.5E7, linestyles=':', color='#808080ff') # pathologist limit 0.01%
ax.hlines(xmin=-1, xmax=i+1, y=6.68E06, linestyles='dashdot', color='#808080ff') # dPCR limit 0.089%
#ax.hlines(xmin=-1, xmax=i+1, y=8.25E06, linestyles='-',color='#808080ff') # dPCR limit 0.11%

ax.set_yscale('log', basey=10)
ax.set_ylim(1E-10, 1E12)
ax.set_yticks([1,1E1,1E2,1E3,1E4,1E5,1E6,1E7,1E8,1E9,1E10,1E11])
ax.set_ylabel("log10(number of relapse \n cells at diagnosis)")

ax.set_xticks([x for x in range(0,len(df_plot['PATIENT'].unique()),1)])
ax.set_xticklabels(labels=xlabels,ha='center', rotation=90)
ax.tick_params(axis='x', length=0)

ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
    
# Legend
handles, labels = plt.gca().get_legend_handles_labels()

dashed_line = mlines.Line2D([], [], linewidth=2, linestyle="--", dashes=(3.7, 2), color='#808080ff')
dotted_line = mlines.Line2D([], [], linewidth=2, linestyle=":", color='#808080ff')
dashdot_line = mlines.Line2D([], [], linewidth=2, linestyle='dashdot', color='#808080ff')
#solid_line = mlines.Line2D([], [], linewidth=2, linestyle='-',color='#808080ff')

handles.append(dotted_line)
labels.append('MRD 0.01')
dashes=(0, (3, 5, 1, 5, 1, 5))
handles.append(dashed_line)
labels.append('1 relapse founder cell')

handles.append(dashdot_line)
labels.append('dPCR detection limit SMARCA4')

ax.legend(handles=handles,bbox_to_anchor=(1,0.5),prop={'size': 8})
by_label = OrderedDict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys(),prop={'size': 14},bbox_to_anchor=(1,0.5))

plt.tight_layout()
fig.savefig("relapse_cells_in_dx.svg", dpi=300,bbox_inches='tight')
plt.show()

PAT8 mutación: T786I límite: aprox 0.11%

PAT14 mutación: G1162S límite: 0.089%.