In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pysam
from collections import Counter
from matplotlib import cm
from matplotlib.colors import ListedColormap
from datetime import datetime, date, timedelta

import io
from pysam import VariantFile

In [None]:
context = 'paper' # talk or paper

if context == 'talk':
    sns.set(style="darkgrid", context="talk", rc={"lines.linewidth": 2, "legend.fontsize":25})
    plt.style.use("dark_background")
    plt.rcParams.update({"grid.linewidth":0.5, "grid.alpha":0.5,'font.size': 30})
    sns.set_palette("deep")
else:
    sns.set(context="paper", font_scale=4, rc={"lines.linewidth": 2, "legend.fontsize":25})
    plt.rcParams.update({'font.size': 35})
    sns.set_palette("deep")

lc = 'w' if context == 'talk' else 'k'

In [None]:
# CRC deep WGS patients
# MSS: 986, 809, 1014, 519, 1279
# MS status not done: 512, 1531

#patient = 1014
#patient = 986
#patient = 809
patient = 1388

# Tumor burden

In [None]:
tf_file['patient'].unique()

In [None]:
# tumor burden
tf_file_1 = pd.read_csv('../data/tumor_burden/tumor_burden_ichorcna_CCG.txt', header=None)
tf_file_1['patient'] = tf_file_1[0].str.split('.').str[0].str.split('_').str[-1]
tf_file_1['date'] = tf_file_1[0].str.split('.').str[1]
tf_file_1['patient_date'] = tf_file_1['patient']  + '_' + tf_file_1['date'] 
tf_file_1.set_index('patient_date', inplace=True)
tf_file_1.drop(0, axis=1, inplace=True)
tf_file_1.rename(columns={1: 'tumor_burden'}, inplace=True)

tf_file_2 = pd.read_csv('../data/tumor_burden/tumor_burden_ichorcna_CRC.txt', header=None)
tf_file_2['patient'] = tf_file_2[0].str.split('-').str[1].str.split('_').str[0]
tf_file_2['date'] = tf_file_2[0].str.split('_').str[1]
tf_file_2['patient_date'] = tf_file_2['patient'] + '_' + tf_file_2['date'] 
tf_file_2.set_index('patient_date', inplace=True)
tf_file_2.drop(0, axis=1, inplace=True)
tf_file_2.rename(columns={1: 'tumor_burden'}, inplace=True)

# print(tf_file_1.shape, tf_file_2.shape)

tf_file = pd.concat([tf_file_1, tf_file_2])
tf_file['patient'] = tf_file['patient'].astype(int)
tf_file['date'] = pd.to_datetime(tf_file['date'], format='%d%m%y')
tf_file.reset_index(inplace=True)
tf_file.drop('patient_date', axis=1, inplace=True)

tumorburden_dates = tf_file[tf_file['patient'] == patient]['date'].sort_values().astype(str).unique()
print(tumorburden_dates)

print(tf_file[tf_file['patient'] == patient].shape[0])
tf_file[tf_file['patient'] == patient]

# Treatment

In [None]:
treatment_file = pd.read_csv('../data/treatment/patient_treatment_total_std_201109.txt', sep='\t')
treatment_file['patient'] = treatment_file['patient'].astype(int)
treatment_file['date'] = pd.to_datetime(treatment_file['date'], format='%Y-%m-%d')
treatment_file.rename(columns={'value':'treatment'}, inplace=True)
treatment_file = treatment_file[['patient', 'date', 'treatment']]
treatment_file[treatment_file['patient'] == patient]['date'].astype(str).unique()

# Patient timeline

In [None]:
treatment_patient = treatment_file[treatment_file['patient'] == patient].sort_values('date')
treatment_patient['date'] = treatment_patient['date'].astype(str)
tf_patient = tf_file[tf_file['patient'] == patient].sort_values('date')
tf_patient['date'] = tf_patient['date'].astype(str)
df_patient = pd.concat([treatment_patient, tf_patient])
df_patient = df_patient.sort_values('date')

In [None]:
print([str(a) for a in tf_patient['date'].unique() if a not in df_patient['date'].unique()])
print([str(b) for b in df_patient['date'].unique() if b not in tf_patient['date'].unique()])
alldates = sorted(list(set(list(df_patient['date'].values) + list(tf_patient['date'].values))))
print(len(alldates))

In [None]:
daytimes = [(datetime.strptime(alldates[i], '%Y-%m-%d') - datetime.strptime(alldates[0], '%Y-%m-%d')).days
            for i in range(len(alldates))]
print(daytimes)

In [None]:
plt.figure()

fig, ax2 = plt.subplots(figsize=(40,10))

# make a plot with different y-axis using second axis object
sns.stripplot(y='treatment', x='date', hue='treatment', data=df_patient, s=10, ax=ax2)
ax2.grid(False)
plt.legend((), ())
        
# twin object for two different y-axis on the sample plot
ax=ax2.twinx()
ele0 = ax.plot(df_patient['date'], df_patient['tumor_burden'], lc+'.', marker='s', markersize=10, label='ichorCNA tumor burden')

ax.plot(df_patient['date'][~df_patient['tumor_burden'].isna()], df_patient['tumor_burden'][~df_patient['tumor_burden'].isna()], lc+'-', linewidth=4)
ax.plot(df_patient['date'][~df_patient['tumor_burden'].isna()], df_patient['tumor_burden'][~df_patient['tumor_burden'].isna()], lc+'.', marker='s', markersize=10)
ax.set_ylabel('fraction (tumor burden or VAF)', fontsize=30)
ax.set_ylim(-0.01, 1)

fig.legend([ele0], ['ichorCNA tumor burden'], loc='upper left')
ax.legend(loc='upper left')

labels = [ad if ad in tumorburden_dates else '' for ad in alldates]
ax2.set_xticklabels(labels, rotation=90, fontsize=30)
ax.set_xticklabels(labels, rotation=90, fontsize=30)

plt.title('Patient {}: VAF evolution across timepoints'.format(patient))

if not os.path.exists('../figures/oncosg_timeline_patient'+str(patient)+'.png'):
    plt.savefig('../figures/oncosg_timeline_patient'+str(patient)+'.png', bbox_inches='tight')
    
plt.show()

In [None]:
# check tf dates

date_lowtftimepoints = list(tf_patient[tf_patient['tumor_burden'] == 0]['date'].unique())
print(date_lowtftimepoints)
tf_patient[['date', 'tumor_burden']].T

# Mutation calls

In [None]:
def read_vcf(path):
    with open(path, 'r') as f:
        lines = [l for l in f if not l.startswith('##')]
    res = pd.read_csv(
        io.StringIO(''.join(lines)),
        dtype={'#CHROM': str, 'POS': int, 'ID': str, 'REF': str, 'ALT': str,
               'QUAL': str, 'FILTER': str, 'INFO': str},
        sep='\t'
    ).rename(columns={'#CHROM': 'CHROM'})
    return res

In [None]:
# if  os.path.exists('../data/variant_calls/'+str(patient)+'_reGeno.VEP.vcf'):
#    mutation_df = read_vcf('../data/variant_calls/'+str(patient)+'_reGeno.VEP.vcf')
#    mutation_df = mutation_df[mutation_df['FILTER'] == 'PASS']
#    print(mutation_df.columns)
#    
#    col = ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT']
#    for it in list(date_lowtftimepoints):
#        aux = '986_'+ it.split('-')[-1]+it.split('-')[1]+it.split('-')[0][-2:]
#        if aux in mutation_df.columns:
#            col.append(aux)
#        else:
#            print(aux)
#    print(col)
#    mutation_lowtftimepoints = mutation_df[col]
#    mutation_lowtftimepoints
#    foo2 = lambda x: pd.Series(float(x.split(':')[-1])/float(x.split(':')[-2]))
#    mutation_lowtftimepoints

In [None]:
mutation_file = pd.read_csv('../data/variant_calls/total_variants_mmpm_puradj.txt', sep='\t')
print(mutation_file['GENE'].nunique())
mutation_file = mutation_file[mutation_file['pid'] == patient]
mutation_file.groupby('GENE').count()
mutation_lowtftimepoints = mutation_file[(mutation_file['ichor_tf'] == 0)][['date', 'GENE', 'REF', 'ALT', 'VAF', 'TIERS']]
mutation_lowtftimepoints = mutation_file[(mutation_file['ichor_tf'] < 0.05)][['date', 'GENE', 'REF', 'ALT', 'VAF', 'TIERS']]
mutation_lowtftimepoints = mutation_lowtftimepoints[(mutation_lowtftimepoints['TIERS'] != 'Germline') & (mutation_lowtftimepoints['TIERS'] != 'Artifact')]

mutation_lowtftimepoints = mutation_lowtftimepoints[(mutation_lowtftimepoints['TIERS'] == 'Trusted') ]
mutation_lowtftimepoints

In [None]:
lowtftimepoints_dict = {'date': date_lowtftimepoints,
                        'median VAF': [],
                        '# mutated genes' : [],
                        'median VAF within mutated genes': [],
                        '# mutated genes TRUSTED': [],
                        'median VAF within mutated genes TRUSTED': []
                       }

for date in date_lowtftimepoints:
    nmut = mutation_lowtftimepoints[(date_lowtftimepoints == date) & (mutation_lowtftimepoints['VAF'] != 0)].shape[0]
    nmuttrust = mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date)  & (mutation_lowtftimepoints['VAF'] != 0)  & (mutation_lowtftimepoints['TIERS'] == 'Trusted')].shape[0]
    medianvaf = np.median(mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date)]['VAF'].values)
    medianvafn = np.median(mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date) & (mutation_lowtftimepoints['VAF'] != 0)]['VAF'].values)
    medianvafntrust = np.median(mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date) & (mutation_lowtftimepoints['VAF'] != 0) & (mutation_lowtftimepoints['TIERS'] == 'Trusted')]['VAF'].values)
    lowtftimepoints_dict['# mutated genes'].append(nmut)
    lowtftimepoints_dict['# mutated genes TRUSTED'].append(nmuttrust)
    lowtftimepoints_dict['median VAF'].append(medianvaf)
    lowtftimepoints_dict['median VAF within mutated genes'].append(medianvafn)
    lowtftimepoints_dict['median VAF within mutated genes TRUSTED'].append(medianvafntrust)
    

lowtftimepoints_pd = pd.DataFrame.from_dict(lowtftimepoints_dict)
lowtftimepoints_pd.set_index('date', inplace=True)
lowtftimepoints_pd

# Bed file for genes to check manually

location on Aquila

/mnt/projects/zwpoh/cfDNA/bulk/ccg/ccg_batch2/lpwgs/hg19_bam/patient/986*.bam

In [None]:
genes2checkmanually = []
results_df = None
for date in mutation_lowtftimepoints['date'].unique():
    print(date)
    if results_df is None:
        results_df = mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date) & (mutation_lowtftimepoints['VAF'] != 0)  & (mutation_lowtftimepoints['TIERS'] == 'Trusted')]
    else:
        results_df = pd.concat([results_df, mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date) & (mutation_lowtftimepoints['VAF'] != 0)  & (mutation_lowtftimepoints['TIERS'] == 'Trusted')]])
    ll = list(mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date) & (mutation_lowtftimepoints['VAF'] != 0)  & (mutation_lowtftimepoints['TIERS'] == 'Trusted')]['GENE'].values)
    for g in ll:
        genes2checkmanually.append(g)
genes2checkmanually = list(np.unique(genes2checkmanually))
# print genes to check munually
print(genes2checkmanually)

# need to get gene location in hg19
print(mutation_lowtftimepoints['date'].unique())
gene_df = None
for date in  mutation_lowtftimepoints['date'].unique():
    for i, r in mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date) & (mutation_lowtftimepoints['VAF'] != 0)  & (mutation_lowtftimepoints['TIERS'] == 'Trusted')].iterrows():
        if gene_df is None:
            gene_df = mutation_file[['date', '#CHROM', 'POS', 'REF', 'ALT', 'GENE']][(mutation_file['date'] == r['date']) & (mutation_file['GENE'] == r['GENE'])]
        else:
            gene_df = pd.concat([gene_df, mutation_file[['date', '#CHROM', 'POS', 'REF', 'ALT', 'GENE']][(mutation_file['date'] == r['date']) & (mutation_file['GENE'] == r['GENE'])]])
gene_df.drop('date', axis=1, inplace=True)
gene_df.drop_duplicates(inplace=True)
gene_df

In [None]:
# gene_df[['#CHROM', 'POS', 'GENE', 'REF', 'ALT']].to_csv('../data/variant_calls/positionfile'+str(patient)+'.txt', header=False, index=False)

# 4 mutations across time

In [None]:
mutations_acrosstime = mutation_file[mutation_file['GENE'].isin(genes2checkmanually)][['date', 'GENE', 'REF', 'ALT', 'VAF', 'TIERS']]
mutations_acrosstime = mutations_acrosstime[(mutations_acrosstime['TIERS'] == 'Trusted')]
mutations_acrosstime = mutations_acrosstime.pivot_table(values='VAF', index='GENE', columns='date', aggfunc='first')
mutations_acrosstime = mutations_acrosstime.T
mutations_acrosstime.head()

In [None]:
plt.figure()

fig, ax2 = plt.subplots(figsize=(40,10))

# make a plot with different y-axis using second axis object
sns.stripplot(y='treatment', x='date', hue='treatment', data=df_patient, s=10, ax=ax2)
labels = ax2.axes.get_xticklabels()
ax2.grid(False)
plt.legend((), ())
        
# twin object for two different y-axis on the sample plot
ax=ax2.twinx()
ele0 = ax.plot(df_patient['date'], df_patient['tumor_burden'], lc+'.', marker='s', markersize=10, label='ichorCNA tumor burden')

ax.plot(df_patient['date'][~df_patient['tumor_burden'].isna()], df_patient['tumor_burden'][~df_patient['tumor_burden'].isna()], lc+'-', linewidth=4)
ax.plot(df_patient['date'][~df_patient['tumor_burden'].isna()], df_patient['tumor_burden'][~df_patient['tumor_burden'].isna()], lc+'.', marker='s', markersize=10)
ax.set_ylabel('fraction (tumor burden or VAF)', fontsize=30)
ax.set_ylim(-0.01, 1)

xacrosstime = [i for i in mutations_acrosstime.index if i in df_patient['date'].values]

eles = [ele0]
collist = ['r', 'b', 'g', 'c', 'm', 'y']

for gi, gene in enumerate(genes2checkmanually):

    yacrosstime = [m for i,m in enumerate(mutations_acrosstime[gene].values) if mutations_acrosstime.index[i] in df_patient['date'].values]
    elei = ax.plot(xacrosstime, yacrosstime, collist[gi]+'-', linewidth=2, label=gene)
    ax.plot(xacrosstime, yacrosstime, collist[gi]+'.', marker='D', markersize=10)
    eles.append(elei)

fig.legend(eles, ['ichorCNA tumor burden'] + genes2checkmanually, loc='upper left')
ax.legend(loc='upper left')

labels = [ad if ad in tumorburden_dates else '' for ad in alldates]
ax2.set_xticklabels(labels, rotation=90, fontsize=30)
ax.set_xticklabels(labels, rotation=90, fontsize=30)

plt.title('Patient {}: VAF evolution across timepoints'.format(patient))

if not os.path.exists('../figures/oncosg_timeline_mutations_patient'+str(patient)+'.png'):
    plt.savefig('../figures/oncosg_timeline_mutations_patient'+str(patient)+'.png', bbox_inches='tight')

plt.show()

# Generate plots

In [None]:
mutation_file = pd.read_csv('../data/variant_calls/total_variants_mmpm_puradj.txt', sep='\t')
a = mutation_file['pid'].unique()
b = tf_file['patient'].unique()
print(a)
set(a)-set(b)

In [None]:
for patient in mutation_file['pid'].unique():
    print(patient)
    # tumor burden
    tf_file_1 = pd.read_csv('../data/tumor_burden/tumor_burden_ichorcna_CCG.txt', header=None)
    tf_file_1['patient'] = tf_file_1[0].str.split('.').str[0].str.split('_').str[-1]
    tf_file_1['date'] = tf_file_1[0].str.split('.').str[1]
    tf_file_1['patient_date'] = tf_file_1['patient']  + '_' + tf_file_1['date'] 
    tf_file_1.set_index('patient_date', inplace=True)
    tf_file_1.drop(0, axis=1, inplace=True)
    tf_file_1.rename(columns={1: 'tumor_burden'}, inplace=True)

    tf_file_2 = pd.read_csv('../data/tumor_burden/tumor_burden_ichorcna_CRC.txt', header=None)
    tf_file_2['patient'] = tf_file_2[0].str.split('-').str[1].str.split('_').str[0]
    tf_file_2['date'] = tf_file_2[0].str.split('_').str[1]
    tf_file_2['patient_date'] = tf_file_2['patient'] + '_' + tf_file_2['date'] 
    tf_file_2.set_index('patient_date', inplace=True)
    tf_file_2.drop(0, axis=1, inplace=True)
    tf_file_2.rename(columns={1: 'tumor_burden'}, inplace=True)

    # print(tf_file_1.shape, tf_file_2.shape)

    tf_file = pd.concat([tf_file_1, tf_file_2])
    tf_file['patient'] = tf_file['patient'].astype(int)
    tf_file['date'] = pd.to_datetime(tf_file['date'], format='%d%m%y')
    tf_file.reset_index(inplace=True)
    tf_file.drop('patient_date', axis=1, inplace=True)

    tumorburden_dates = tf_file[tf_file['patient'] == patient]['date'].sort_values().astype(str).unique()
    
    treatment_file = pd.read_csv('../data/treatment/patient_treatment_total_std_201109.txt', sep='\t')
    treatment_file['patient'] = treatment_file['patient'].astype(int)
    treatment_file['date'] = pd.to_datetime(treatment_file['date'], format='%Y-%m-%d')
    treatment_file.rename(columns={'value':'treatment'}, inplace=True)
    treatment_file = treatment_file[['patient', 'date', 'treatment']]
    treatment_file[treatment_file['patient'] == patient]['date'].astype(str).unique()
    
    treatment_patient = treatment_file[treatment_file['patient'] == patient].sort_values('date')
    treatment_patient['date'] = treatment_patient['date'].astype(str)
    tf_patient = tf_file[tf_file['patient'] == patient].sort_values('date')
    tf_patient['date'] = tf_patient['date'].astype(str)
    df_patient = pd.concat([treatment_patient, tf_patient])
    df_patient = df_patient.sort_values('date')

    alldates = sorted(list(set(list(df_patient['date'].values) + list(tf_patient['date'].values))))
    

    daytimes = [(datetime.strptime(alldates[i], '%Y-%m-%d') - datetime.strptime(alldates[0], '%Y-%m-%d')).days
                for i in range(len(alldates))]
    

    plt.figure()

    fig, ax2 = plt.subplots(figsize=(40,10))

    # make a plot with different y-axis using second axis object
    sns.stripplot(y='treatment', x='date', hue='treatment', data=df_patient, s=10, ax=ax2)
    ax2.grid(False)
    plt.legend((), ())

    # twin object for two different y-axis on the sample plot
    ax=ax2.twinx()
    ele0 = ax.plot(df_patient['date'], df_patient['tumor_burden'], lc+'.', marker='s', markersize=10, label='ichorCNA tumor burden')

    ax.plot(df_patient['date'][~df_patient['tumor_burden'].isna()], df_patient['tumor_burden'][~df_patient['tumor_burden'].isna()], lc+'-', linewidth=4)
    ax.plot(df_patient['date'][~df_patient['tumor_burden'].isna()], df_patient['tumor_burden'][~df_patient['tumor_burden'].isna()], lc+'.', marker='s', markersize=10)
    ax.set_ylabel('fraction (tumor burden or VAF)', fontsize=30)
    ax.set_ylim(-0.01, 1)

    fig.legend([ele0], ['ichorCNA tumor burden'], loc='upper left')
    ax.legend(loc='upper left')

    labels = [ad if ad in tumorburden_dates else '' for ad in alldates]
    ax2.set_xticklabels(labels, rotation=90, fontsize=30)
    ax.set_xticklabels(labels, rotation=90, fontsize=30)

    plt.title('Patient {}: VAF evolution across timepoints'.format(patient))

    if not os.path.exists('../figures/low_tb_timepoints/oncosg_timeline_patient'+str(patient)+'.png'):
        plt.savefig('../figures/low_tb_timepoints/oncosg_timeline_patient'+str(patient)+'.png', bbox_inches='tight')


    date_lowtftimepoints = list(tf_patient[tf_patient['tumor_burden'] == 0]['date'].unique())
    
    tf_patient[['date', 'tumor_burden']].T

    mutation_file = pd.read_csv('../data/variant_calls/total_variants_mmpm_puradj.txt', sep='\t')
    
    mutation_file = mutation_file[mutation_file['pid'] == patient]
    mutation_file.groupby('GENE').count()
    mutation_lowtftimepoints = mutation_file[(mutation_file['ichor_tf'] <= max(0, mutation_file['ichor_tf'].min()))][['date', 'GENE', 'REF', 'ALT', 'VAF', 'TIERS']]
    mutation_lowtftimepoints = mutation_lowtftimepoints[(mutation_lowtftimepoints['TIERS'] != 'Germline') & (mutation_lowtftimepoints['TIERS'] != 'Artifact')]

    mutation_lowtftimepoints = mutation_lowtftimepoints[(mutation_lowtftimepoints['TIERS'] == 'Trusted') ]
    mutation_lowtftimepoints

    lowtftimepoints_dict = {'date': date_lowtftimepoints,
                            'median VAF': [],
                            '# mutated genes' : [],
                            'median VAF within mutated genes': [],
                            '# mutated genes TRUSTED': [],
                            'median VAF within mutated genes TRUSTED': []
                           }

    for date in date_lowtftimepoints:
        nmut = mutation_lowtftimepoints[(date_lowtftimepoints == date) & (mutation_lowtftimepoints['VAF'] != 0)].shape[0]
        nmuttrust = mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date)  & (mutation_lowtftimepoints['VAF'] != 0)  & (mutation_lowtftimepoints['TIERS'] == 'Trusted')].shape[0]
        medianvaf = np.median(mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date)]['VAF'].values)
        medianvafn = np.median(mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date) & (mutation_lowtftimepoints['VAF'] != 0)]['VAF'].values)
        medianvafntrust = np.median(mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date) & (mutation_lowtftimepoints['VAF'] != 0) & (mutation_lowtftimepoints['TIERS'] == 'Trusted')]['VAF'].values)
        lowtftimepoints_dict['# mutated genes'].append(nmut)
        lowtftimepoints_dict['# mutated genes TRUSTED'].append(nmuttrust)
        lowtftimepoints_dict['median VAF'].append(medianvaf)
        lowtftimepoints_dict['median VAF within mutated genes'].append(medianvafn)
        lowtftimepoints_dict['median VAF within mutated genes TRUSTED'].append(medianvafntrust)


    lowtftimepoints_pd = pd.DataFrame.from_dict(lowtftimepoints_dict)
    lowtftimepoints_pd.set_index('date', inplace=True)
    lowtftimepoints_pd

    genes2checkmanually = []
    results_df = None
    for date in mutation_lowtftimepoints['date'].unique():
        if results_df is None:
            results_df = mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date) & (mutation_lowtftimepoints['VAF'] != 0)  & (mutation_lowtftimepoints['TIERS'] == 'Trusted')]
        else:
            results_df = pd.concat([results_df, mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date) & (mutation_lowtftimepoints['VAF'] != 0)  & (mutation_lowtftimepoints['TIERS'] == 'Trusted')]])
        ll = list(mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date) & (mutation_lowtftimepoints['VAF'] != 0)  & (mutation_lowtftimepoints['TIERS'] == 'Trusted')]['GENE'].values)
        for g in ll:
            genes2checkmanually.append(g)
    genes2checkmanually = list(np.unique(genes2checkmanually))
    # print genes to check munually

    if genes2checkmanually:
        # need to get gene location in hg19
        gene_df = None
        for date in  mutation_lowtftimepoints['date'].unique():
            for i, r in mutation_lowtftimepoints[(mutation_lowtftimepoints['date'] == date) & (mutation_lowtftimepoints['VAF'] != 0)  & (mutation_lowtftimepoints['TIERS'] == 'Trusted')].iterrows():
                if gene_df is None:
                    gene_df = mutation_file[['date', '#CHROM', 'POS', 'REF', 'ALT', 'GENE']][(mutation_file['date'] == r['date']) & (mutation_file['GENE'] == r['GENE'])]
                else:
                    gene_df = pd.concat([gene_df, mutation_file[['date', '#CHROM', 'POS', 'REF', 'ALT', 'GENE']][(mutation_file['date'] == r['date']) & (mutation_file['GENE'] == r['GENE'])]])
        gene_df.drop('date', axis=1, inplace=True)
        gene_df.drop_duplicates(inplace=True)


        mutations_acrosstime = mutation_file[mutation_file['GENE'].isin(genes2checkmanually)][['date', 'GENE', 'REF', 'ALT', 'VAF', 'TIERS']]
        mutations_acrosstime = mutations_acrosstime[(mutations_acrosstime['TIERS'] == 'Trusted')]
        mutations_acrosstime = mutations_acrosstime.pivot_table(values='VAF', index='GENE', columns='date', aggfunc='first')
        mutations_acrosstime = mutations_acrosstime.T


        plt.figure()

        fig, ax2 = plt.subplots(figsize=(40,10))

        # make a plot with different y-axis using second axis object
        sns.stripplot(y='treatment', x='date', hue='treatment', data=df_patient, s=10, ax=ax2)
        labels = ax2.axes.get_xticklabels()
        ax2.grid(False)
        plt.legend((), ())

        # twin object for two different y-axis on the sample plot
        ax=ax2.twinx()
        ele0 = ax.plot(df_patient['date'], df_patient['tumor_burden'], lc+'.', marker='s', markersize=10, label='ichorCNA tumor burden')

        ax.plot(df_patient['date'][~df_patient['tumor_burden'].isna()], df_patient['tumor_burden'][~df_patient['tumor_burden'].isna()], lc+'-', linewidth=4)
        ax.plot(df_patient['date'][~df_patient['tumor_burden'].isna()], df_patient['tumor_burden'][~df_patient['tumor_burden'].isna()], lc+'.', marker='s', markersize=10)
        ax.set_ylabel('fraction (tumor burden or VAF)', fontsize=30)
        ax.set_ylim(-0.01, 1)

        xacrosstime = [i for i in mutations_acrosstime.index if i in df_patient['date'].values]

        eles = [ele0]
        collist = ['r', 'b', 'g', 'c', 'm', 'y', 'tab:pink', 'tab:orange', 'tab:olive', 'tab:purple', 'grey']

        for gi, gene in enumerate(genes2checkmanually):

            yacrosstime = [m for i,m in enumerate(mutations_acrosstime[gene].values) if mutations_acrosstime.index[i] in df_patient['date'].values]
            elei = ax.plot(xacrosstime, yacrosstime, color=collist[gi], linewidth=2, label=gene)
            ax.plot(xacrosstime, yacrosstime, color=collist[gi], ls='-.', marker='D', markersize=10)
            eles.append(elei)

        fig.legend(eles, ['ichorCNA tumor burden'] + genes2checkmanually, loc='upper left')
        ax.legend(loc='upper left')

        labels = [ad if ad in tumorburden_dates else '' for ad in alldates]
        ax2.set_xticklabels(labels, rotation=90, fontsize=30)
        ax.set_xticklabels(labels, rotation=90, fontsize=30)

        plt.title('Patient {}: VAF evolution across timepoints'.format(patient))

        if not os.path.exists('../figures/low_tb_timepoints/oncosg_timeline_mutations_patient'+str(patient)+'.png'):
            plt.savefig('../figures/low_tb_timepoints/oncosg_timeline_mutations_patient'+str(patient)+'.png', bbox_inches='tight')
