This notebook makes plots of the copy number profile of each sample from FACETS results which correspond to Additional file 1 Figure S5.

This piece of code relies on a workspace directory structure such as 
```
cohort/
	patientID/
		DxTumorID_vs_normalID/
		ReTumorID_vs_normalID/ (sometimes)

```
 patientID, DxTumorID etc can be found in ../ext_files/all_cohort_clinical_groups.tsv

The results of the calls from FACETS are expected to come from runs that used scripts here: ```../ext_runs/run_FACETS``` and the calls for each patient are expected to follow the above folder system.

In [None]:
import sys, os
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

from matplotlib import collections  as mc
from aux_data_in_pyvar import config_rcparams
from aux_functions import process_cnv

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', -1)

%load_ext autoreload
%autoreload 2

In [None]:
config_rcparams()

In [None]:
dict_cnv = {
    0: -1,
    1: -0.5,
    2: 0,
    3: 0.5,
    4: 1,
    5: 1.5,
    6: 2,
    7: 2.5,
    8: 3}

In [None]:
# get chromosome length

length_chrom = pd.read_table("../ext_files/chrom_length", sep='\t', names=['chrom', 'len', 'sth'])
length_chrom = length_chrom.drop('sth', axis=1)
length_chrom['chrom'] = length_chrom['chrom'].str.replace("chr", "")
length_chrom = length_chrom[length_chrom['chrom'] != 'Y']

order_chrom = [str(x) for x in range(1,23,1)]
order_chrom.extend(['X'])

total = 30956774120 
length_chrom['prop'] = length_chrom.apply(lambda x: (x['len']/total)*100,axis=1)
length_chrom = length_chrom.sort_values(by='chrom')

prop_chrom = length_chrom[['chrom', 'prop']]
prop_chrom = prop_chrom.set_index('chrom')
prop_chrom = prop_chrom.loc[order_chrom]
widths = list(prop_chrom['prop'])

length_chrom.set_index('chrom', inplace=True)
length_chrom = length_chrom.to_dict()
length_chrom = length_chrom['len']
length_chrom

#### Read and fix copy number 

In [None]:
## READ CLINICAL DATA
clinical = pd.read_csv("../ext_files/all_cohort_clinical_groups.tsv", sep='\t')
clinical = clinical[clinical['COHORT'] == "ADULT TALL AECC PROJECT"]


## ADULT DATA
dire_in = "" # run of facets
dire_out = "" # input path for figures

In [None]:
facets_results = process_cnv(dire_in, clinical)

#### Plot copy number

In [None]:
grps_pat = facets_results.groupby("patient")
order_sample = set(facets_results['sample'].sort_values())

In [None]:
for pat in grps_pat.groups:
    print(pat)
    df_example = grps_pat.get_group(pat)
    fig = plt.figure(figsize=(30,9))
    outer = gridspec.GridSpec(2, 1, wspace=0.2, hspace=0.6)

    for s, sam in enumerate(df_example['sample'].unique()):
        df_example_sub = df_example[df_example['sample'] == sam]
        df_example_sub = df_example_sub.sort_values(by=['start'])

        axx = plt.subplot(outer[s])
        axx.set_title(sam, pad=60, fontsize=28)
        axx.set_yticks([], [])
        axx.set_xticks([], [])

        grps_chrom = df_example_sub.groupby("chrom")

        for j, chrom in enumerate(order_chrom):
            df_chrom = grps_chrom.get_group(chrom)
            if df_chrom.empty == True:
                df_chrom = df_chrom.append({'chrom':chrom, 'start':0, 'end':length_chrom[chrom], 
                                            'tcn.em':2, 'lcn.em':1, 
                                            'sample':sam, 'patient':pat}, ignore_index=True)
            else:
                df_chrom.reset_index(drop=True, inplace=True)
            
    
            df_chrom['major'] = df_chrom.apply(lambda x: 1 if x['tcn.em'] == 2 and x['lcn.em'] == 0 else x['tcn.em']-x['lcn.em'], axis=1)
            df_chrom['minor'] = df_chrom.apply(lambda x: 1 if x['tcn.em'] == 2 and x['lcn.em'] == 0 else x['lcn.em'], axis=1)

            lines_major = []
            lines_minor = []
            c_maj = []
            c_min = []

            inner = gridspec.GridSpecFromSubplotSpec(1,23, subplot_spec=outer[s,0], wspace=0, 
                                                     hspace=0,width_ratios=widths)

            for i, rw in df_chrom.iterrows():
                if i == 0:
                    if (rw['tcn.em'] == 2) and (rw['lcn.em'] == 0):
                        lines_major.append([(0,1.15),(rw['start'], 1.15)])
                        c_maj.extend("k")
                        lines_minor.append([(0,0.85),(rw['start'], 0.85)])
                        c_min.extend("k")

                        lines_major.append([(df_chrom.loc[i,'start'], df_chrom.loc[i,'major']+0.15), 
                                                (df_chrom.loc[i,'end'], df_chrom.loc[i,'major']+0.15)])
                        c_maj.extend("k")
                        lines_minor.append([(df_chrom.loc[i,'start'], df_chrom.loc[i,'minor']-0.15), 
                                            (df_chrom.loc[i,'end'], df_chrom.loc[i,'minor']-0.15)])
                        c_min.extend("k")
                    else:
                        lines_major.append([(0,1.15),(rw['start'], 1.15)])
                        c_maj.extend("k")
                        lines_minor.append([(0,0.85),(rw['start'], 0.85)])
                        c_min.extend({u'gray': (0.50196078431372548, 0.50196078431372548, 0.50196078431372548)})

                        lines_major.append([(df_chrom.loc[i,'start'], df_chrom.loc[i,'major']+0.15), 
                                                (df_chrom.loc[i,'end'], df_chrom.loc[i,'major']+0.15)])
                        c_maj.extend("k")
                        lines_minor.append([(df_chrom.loc[i,'start'], df_chrom.loc[i,'minor']-0.15), 
                                            (df_chrom.loc[i,'end'], df_chrom.loc[i,'minor']-0.15)])
                        c_min.extend({u'gray': (0.50196078431372548, 0.50196078431372548, 0.50196078431372548)})

                else:
                    if (rw['tcn.em'] == 2) and (rw['lcn.em'] == 0):
                        lines_major.append([(df_chrom.loc[i-1,'end'], 1.15), (df_chrom.loc[i,'start'],  1.15)])
                        c_maj.extend("k")
                        lines_minor.append([(df_chrom.loc[i-1,'end'], 0.85), (df_chrom.loc[i,'start'], 0.85)])
                        c_min.extend('k')

                        lines_major.append([(df_chrom.loc[i,'start'], df_chrom.loc[i,'major']+0.15), 
                                          (df_chrom.loc[i,'end'], df_chrom.loc[i,'major']+0.15)])
                        c_maj.extend("k")
                        lines_minor.append([(df_chrom.loc[i,'start'], df_chrom.loc[i,'minor']-0.15), 
                                          (df_chrom.loc[i,'end'], df_chrom.loc[i,'minor']-0.15)])
                        c_min.extend('k')

                    else:
                        lines_major.append([(df_chrom.loc[i-1,'end'], 1.15), (df_chrom.loc[i,'start'],  1.15)])
                        c_maj.extend("k")
                        lines_minor.append([(df_chrom.loc[i-1,'end'], 0.85), (df_chrom.loc[i,'start'], 0.85)])
                        c_min.extend({u'gray': (0.50196078431372548, 0.50196078431372548, 0.50196078431372548)})

                        lines_major.append([(df_chrom.loc[i,'start'], df_chrom.loc[i,'major']+0.15), 
                                          (df_chrom.loc[i,'end'], df_chrom.loc[i,'major']+0.15)])
                        c_maj.extend("k")
                        lines_minor.append([(df_chrom.loc[i,'start'], df_chrom.loc[i,'minor']-0.15), 
                                          (df_chrom.loc[i,'end'], df_chrom.loc[i,'minor']-0.15)])
                        c_min.extend({u'gray': (0.50196078431372548, 0.50196078431372548, 0.50196078431372548)})


            lines_major.append([(df_chrom.loc[i,'end'], 1.15), (length_chrom[chrom], 1.15)])
            c_maj.extend("k")

            lines_minor.append([(df_chrom.loc[i,'end'], 0.85), (length_chrom[chrom], 0.85)])
            c_min.extend({u'gray': (0.50196078431372548, 0.50196078431372548, 0.50196078431372548)})

            lc_minor = mc.LineCollection(lines_minor, colors=c_min, linewidths=5)
            lc_major = mc.LineCollection(lines_major, colors=c_maj, linewidths=5)


            ax = plt.Subplot(fig, inner[j])
            ax.add_collection(lc_major)
            ax.add_collection(lc_minor)
            ax.axhspan(0, 1, facecolor='#0571b0', alpha=0.05)
            ax.axhspan(1, 6, facecolor='#d7191c', alpha=0.05)

            ax.set_ylim([0,6])
            ax.set_xlim([0, length_chrom[df_chrom['chrom'].unique()[0]]]) 


            ax.set_title("chr{}".format(chrom), rotation=30, pad=30, fontsize=24)

            if j == 0:    
                ax1 = fig.add_subplot(ax)
                plt.tick_params(axis='x', which='major', labelsize=20)
                ax.set_yticks([0,1,2,3,4,5,6])
                ax.set_yticklabels([0,1,2,3,4,5,6], fontsize=20)

            else:
                fig.add_subplot(ax, sharey=ax1)    
                plt.setp(ax.get_yticklabels(), visible=False)
                plt.yticks([], [])

            plt.xticks([], [])


    fig.suptitle(pat, x=0.51, y=1.1,fontsize=34)
    plt.tight_layout()
    plt.savefig(os.path.join(dire_out, "cnv_{}.svg").format(pat), 
                    doi=500,bbox_inches = 'tight',pad_inches=0.1)
    plt.savefig(os.path.join(dire_out, "cnv_{}.png").format(pat), 
                    doi=25,format='png',bbox_inches = 'tight',pad_inches=0.1)  
    plt.show()