# Exome Pipeline

In [None]:
import os
import sys
from shlex import split

from IPython.display import display, Markdown, Latex

import numpy as np 

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import functools

from interactPlots import *

import yaml

In [None]:
#Read options file

with open("options.yaml", 'r') as stream:
    data = yaml.safe_load(stream)

In [None]:
#Set environment variables

os.environ["SENTIEON_INSTALL_DIR"] = data['SENTIEON_INSTALL_DIR']
os.environ["SENTIEON_LICENSE"]= data['SENTIEON_LICENSE']
os.environ["BCFTOOLS_PLUGINS"]= data['bcfdir']

In [None]:
#Set working directory

data['home'] = os.getcwd()+"/"
#data['workdir'] = "/home/adefalco/"+ "/" + data['batch'] + "/"
data['workdir'] = data['home'] + data['batch'] + "/"

if not os.path.isdir(data['workdir']): 
    os.mkdir(data['workdir']) 
 


## Batch

In [None]:
#Read batch
from IPython.display import HTML

data['samplelist'] = data['home']+data['batch']+".txt"

batch = []
with open (data['samplelist'], 'r') as f:
    for row in csv.reader(f,delimiter='\t'):
            batch.append(row)
           
batchSize = int(np.size(batch)/np.size(batch,1)) 

HTML(tabulate(batch, headers =["Sample","Tumor", "Normal"], tablefmt="html"))

In [None]:
%%capture
#Dropdown widgets

tumors = [row[1] for row in batch]

widgTum = widgets.Dropdown(
    options= tumors,
    disabled=False,
)  

lists = tumors.copy()
lists.append('ALL')

widgTumAll = widgets.Dropdown(
    options= lists,
    value='ALL',
    disabled=False,
) 

normals = [row[0] for row in batch]
#normals = [batch[0][0]]

uniqNorm = np.unique(normals)

#lists = normals.copy()
#lists.append('ALL')

widgNor = widgets.Dropdown(
    options= uniqNorm,
    #value='ALL',
    disabled=False,
)  

normSize = np.size(uniqNorm)

if data['samplesParallel']==1000:
    data['samplesParallel'] = batchSize+normSize


In [None]:
os.chdir(data['workdir'])
exec_scripts = False #Run shell scripts

In [None]:
#exec_scripts = True 

# 1a. Mapping reads with BWA-MEM, sorting for normal sample
The results of this call are dependent on the number of threads used. To have number of threads independent results, add chunk size option -K 10000000

In [None]:
ERROR = "error"
commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    
    command = "(" + data['SENTIEON_INSTALL_DIR']+"/bin/sentieon bwa mem -M -R '@RG\\tID:"+SAMPLE+"N\\tSM:"+SAMPLE+"N\\tPL:"+data['platform']+"' -t "+str(data['nt'])+" -K 10000000 "+data['fasta']+" "+data['fastq_folder_NORMAL']+"/*_"+data['fastq_1_suffix']+" "+data['fastq_folder_NORMAL']+"/*_"+data['fastq_2_suffix']+" || echo -n "+ ERROR +" ) | "+data['SENTIEON_INSTALL_DIR']+"/bin/sentieon util sort -o "+SAMPLE+"N_sorted.bam -t "+str(data['nt'])+" --sam2bam -i -"
    print(command+'\n')
    commands.append(command)

#if exec_scripts:
#    commandsParallel(commands,normSize,data['samplesParallel'])

# 1a. Mapping reads with BWA-MEM, sorting for tumor sample
The results of this call are dependent on the number of threads used. To have number of threads independent results, add chunk size option -K 10000000

In [None]:
#commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command = "(" + data['SENTIEON_INSTALL_DIR']+"/bin/sentieon bwa mem -M -R '@RG\\tID:"+TUMOR+"T\\tSM:"+TUMOR+"T\\tPL:"+data['platform']+"' -t "+str(data['nt'])+" -K 10000000 "+data['fasta']+" "+data['fastq_folder']+"/"+TUMOR+"/*_"+data['fastq_1_suffix']+" "+data['fastq_folder']+"/"+TUMOR+"/*_"+data['fastq_2_suffix']+" || echo -n "+ ERROR +" ) | "+data['SENTIEON_INSTALL_DIR']+"/bin/sentieon util sort -o "+TUMOR+"T_sorted.bam -t "+str(data['nt'])+" --sam2bam -i -"
    print(command+'\n')
    commands.append(command)

if exec_scripts:
    commandsParallel(commands,normSize+batchSize,data['samplesParallel'])
    

# 2a. Metrics for normal sample

In [None]:
commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command1 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+SAMPLE+"N_sorted.bam --algo MeanQualityByCycle "+SAMPLE+"N_mq_metrics.txt --algo QualDistribution "+SAMPLE+"N_qd_metrics.txt --algo GCBias --summary "+SAMPLE+"N_gc_summary.txt "+SAMPLE+"N_gc_metrics.txt --algo AlignmentStat --adapter_seq '' "+SAMPLE+"N_aln_metrics.txt --algo InsertSizeMetricAlgo "+SAMPLE+"N_is_metrics.txt"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])


In [None]:
%%capture
"""from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import functools
from interactPlots import *

inter = interact(tableShow, Sample=widgNor, file=fixed("_N_aln_metrics.txt"), cols = fixed([0,1,2,5,6]), listSample = fixed([]));
display(inter)"""

In [None]:


javascript = """

<script type="text/Javascript">

function myTable(idd, jsonObj) {

  var x = document.getElementById("selectSample"+idd).value;
  document.getElementById("myText"+idd).innerHTML = x;
  
  var table = document.getElementById("myTable"+idd);

  while(table.rows[0]) table.deleteRow(0);

  i = 0;
  
  var jsonObj2 = jsonObj[x];
  
  console.log(jsonObj2)
  
  Object.keys(jsonObj2).forEach(function(k){
  
    var row = table.insertRow(i);
    i = i+1;
    var cell1 = row.insertCell(0);
    var cell2 = row.insertCell(1);
    var cell3 = row.insertCell(2);
    var cell4 = row.insertCell(3);
    cell1.innerHTML = k;
    cell2.innerHTML = jsonObj2[k].TOTAL_READS;
    cell3.innerHTML = jsonObj2[k].PF_READS_ALIGNED;
    cell4.innerHTML = jsonObj2[k].PCT_PF_READS_ALIGNED;
    
  }); 
  
     var header = table.createTHead();
    var row = header.insertRow(0);     
    var cell1 = row.insertCell(0);
    var cell2 = row.insertCell(1);
    var cell3 = row.insertCell(2);
    var cell4 = row.insertCell(3);
    cell1.innerHTML = "CATEGORY";
    cell2.innerHTML = "TOTAL_READS";
    cell3.innerHTML = "PF_READS_ALIGNED";
    cell4.innerHTML = "PCT_PF_READS_ALIGNED";
}

function myDropdown(sel,opts) {
    for(var i = 0; i < opts.length; i++) {
        var opt = opts[i];
        var el = document.createElement("option");
        el.textContent = opt;
        el.value = opt;
        if (i < 1) {
            el.selected = "true";
        }
        sel.appendChild(el);
    }
}

function myPlots(type,idd) {
  var x = document.getElementById("selectSample"+idd).value;
  document.getElementById("myText"+idd).innerHTML = x;
  document.getElementById("pdf"+idd).src = x+type;
}

</script>

"""

HTML(javascript)

In [None]:
path = "/storage/gluster/vol1/data/PUBLIC/SCAMBIO/ABT414_WES_Analysis/ABT414_Flank/ABT414_Flank/" #TO REMOVE
#path = "/storage/gluster/vol1/data/PUBLIC/SCAMBIO/MayoClinicData_30_08_2019_Analysis/" #TO REMOVE
#path = path + data['batch'] + "/"
file = "_N_aln_metrics.txt"

alldata = {}

for i in range(normSize):
    csvFilePath = path+uniqNorm[i]+file
    jsonfile = make_json(csvFilePath,uniqNorm[i],alldata)

#print(jsonfile)

id = 0
id = id +1

javascript = """


<head>

<style>
table, td {
  border: 1px solid black;
  font-size: 18px;
}
</style>

</head>


<select id='selectSample"""+str(id)+"""'>  </select>

<h1>The sample is:  <span id="myText"""+str(id)+""""></span></h1>

<body>
<table id="myTable"""+str(id)+""""></table>
<br>

<script type="text/Javascript">

var select = document.getElementById("selectSample"""+str(id)+"""");

var objNorm = JSON.parse('"""+jsonfile+"""');

myDropdown(select,Object.keys(objNorm)) 

document.getElementById("selectSample"""+str(id)+"""").onchange = function() {myTable("""+str(id)+""",objNorm)};

myTable("""+str(id)+""",objNorm)

</script>
</body>
"""

HTML(javascript)

- **CATEGORY**: Distinguishes either 
    - PAIRED: for a fragment run.
    - FIRST_OF_PAIR: when metrics are for only the first read in a paired run. 
    - SECOND_OF_PAIR: when metrics are for only the second read in a paired run.
    - PAIR: when metrics are aggregated for both first and second reads in a pair. 


- **TOTAL_READS**: The total number of reads 

- **PF_READS_ALIGNED**: The number of reads that aligned to the reference sequence

- **PCT_PF_READS_ALIGNED**: The percentage of reads that aligned to the reference sequence

In [None]:
display(Markdown("___"))

commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command2 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon plot GCBias -o "+SAMPLE+"N_gc-report.pdf "+SAMPLE+"N_gc_metrics.txt"
    print(command2+'\n')
    commands.append(command2)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

In [None]:
#interact(plots, Sample=widgNor, file=fixed("_N_gc-report.pdf"), normal=fixed(True), listSample = fixed([]));
#embed_minimal_html('export.html', views=test, title='Widgets export')



id = id +1

input_form = """

<select id='selectSample"""+str(id)+"""'>  </select>

<body onload="myPlots()">

<h1>The sample is:  <span id="myText"""+str(id)+""""></span></h1>

<div>
  <iframe id="pdf"""+str(id)+"""" src="" style="width:100%;height:700px;"></iframe>
</div>


</body>

"""

javascript = """

<script type="text/Javascript">

var select = document.getElementById("selectSample"""+str(id)+"""");

myDropdown(select,Object.keys(objNorm)) 

var type"""+str(id)+""" = "_N_gc-report.pdf"

myPlots(type"""+str(id)+""","""+str(id)+""")

document.getElementById("selectSample"""+str(id)+"""").onchange = function() {myPlots(type"""+str(id)+""","""+str(id)+""")};

</script>
"""

HTML(input_form + javascript)


In [None]:
display(Markdown("___"))

commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command3 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon plot QualDistribution -o "+SAMPLE+"N_qd-report.pdf "+SAMPLE+"N_qd_metrics.txt"
    print(command3+'\n')
    commands.append(command3)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

In [None]:
#interact(plots, Sample=widgNor, file=fixed("_N_qd-report.pdf"), normal=fixed(True), listSample = fixed([]));

id = id +1

input_form = """

<select id='selectSample"""+str(id)+"""'>  </select>

<body onload="myPlots()">

<h1>The sample is:  <span id="myText"""+str(id)+""""></span></h1>

<div>
  <iframe id="pdf"""+str(id)+"""" src="" style="width:100%;height:700px;"></iframe>
</div>


</body>

"""

javascript = """

<script type="text/Javascript">

var select = document.getElementById("selectSample"""+str(id)+"""");

myDropdown(select,Object.keys(objNorm)) 

var type"""+str(id)+""" = "_N_qd-report.pdf"

myPlots(type"""+str(id)+""","""+str(id)+""")

document.getElementById("selectSample"""+str(id)+"""").onchange = function() {myPlots(type"""+str(id)+""","""+str(id)+""")};


</script>
"""
    

HTML(input_form + javascript)



In [None]:
display(Markdown("___"))

commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command4 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon plot MeanQualityByCycle -o "+SAMPLE+"N_mq-report.pdf "+SAMPLE+"N_mq_metrics.txt"
    print(command4+'\n')
    commands.append(command4)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

In [None]:
#interact(plots, Sample=widgNor, file=fixed("_N_mq-report.pdf"), normal=fixed(True), listSample = fixed([]));

id = id +1

input_form = """

<select id='selectSample"""+str(id)+"""'>  </select>

<body onload="myPlots()">

<h1>The sample is:  <span id="myText"""+str(id)+""""></span></h1>

<div>
  <iframe id="pdf"""+str(id)+"""" src="" style="width:100%;height:700px;"></iframe>
</div>


</body>

"""

javascript = """

<script type="text/Javascript">

var select = document.getElementById("selectSample"""+str(id)+"""");

myDropdown(select,Object.keys(objNorm)) 

var type"""+str(id)+""" = "_N_mq-report.pdf"

myPlots(type"""+str(id)+""","""+str(id)+""")

document.getElementById("selectSample"""+str(id)+"""").onchange = function() {myPlots(type"""+str(id)+""","""+str(id)+""")};


</script>
"""
    

HTML(input_form + javascript)



In [None]:
display(Markdown("___"))

commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command5 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon plot InsertSizeMetricAlgo -o "+SAMPLE+"N_is-report.pdf "+SAMPLE+"N_is_metrics.txt"
    print(command5+'\n')
    commands.append(command5)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

In [None]:
#interact(plots, Sample=widgNor, file=fixed("_N_is-report.pdf"), normal=fixed(True), listSample = fixed([]));

id = id +1

input_form = """

<select id='selectSample"""+str(id)+"""'>  </select>

<body onload="myPlots()">

<h1>The sample is:  <span id="myText"""+str(id)+""""></span></h1>

<div>
  <iframe id="pdf"""+str(id)+"""" src="" style="width:100%;height:700px;"></iframe>
</div>


</body>

"""

javascript = """

<script type="text/Javascript">

var select = document.getElementById("selectSample"""+str(id)+"""");

myDropdown(select,Object.keys(objNorm)) 

var type"""+str(id)+""" = "_N_is-report.pdf"

myPlots(type"""+str(id)+""","""+str(id)+""")

document.getElementById("selectSample"""+str(id)+"""").onchange = function() {myPlots(type"""+str(id)+""","""+str(id)+""")};


</script>
"""
    

HTML(input_form + javascript)



# 2a. Metrics for tumor sample

In [None]:
commands = []

for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command1 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+TUMOR+"T_sorted.bam --algo MeanQualityByCycle "+TUMOR+"T_mq_metrics.txt --algo QualDistribution "+TUMOR+"T_qd_metrics.txt --algo GCBias --summary "+TUMOR+"T_gc_summary.txt "+TUMOR+"T_gc_metrics.txt --algo AlignmentStat --adapter_seq '' "+TUMOR+"T_aln_metrics.txt --algo InsertSizeMetricAlgo "+TUMOR+"T_is_metrics.txt"
    commands.append(command1)
    print(command1+'\n')
      

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])
    

In [None]:
commands = []

for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command1 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+TUMOR+"T_sorted.bam --algo MeanQualityByCycle "+TUMOR+"T_mq_metrics.txt --algo QualDistribution "+TUMOR+"T_qd_metrics.txt --algo GCBias --summary "+TUMOR+"T_gc_summary.txt "+TUMOR+"T_gc_metrics.txt --algo AlignmentStat --adapter_seq '' "+TUMOR+"T_aln_metrics.txt --algo InsertSizeMetricAlgo "+TUMOR+"T_is_metrics.txt"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
%%capture
"""interact(tableShow, Sample=widgTumAll, file=fixed("_T_aln_metrics.txt"), cols = fixed([0,1,2,5,6]), listSample = fixed(tumors));"""

In [None]:
#path = "/storage/gluster/vol1/data/PUBLIC/SCAMBIO/ABT414_WES_Analysis/ABT414_Flank/ABT414_Flank/"
#Sample = tumors[1]
file = "_T_aln_metrics.txt"    

alldata = {}

for i in range(batchSize):
    csvFilePath = path+tumors[i]+file
    jsonfile = make_json(csvFilePath,tumors[i],alldata)
    
id = id +1

javascript = """

<select id='selectSample"""+str(id)+"""'>  </select>

<h1>The sample is:  <span id="myText"""+str(id)+""""></span></h1>

<body>

<table id="myTable"""+str(id)+"""""></table>
<br>

<script type="text/Javascript">

var select = document.getElementById("selectSample"""+str(id)+"""");

var objTum = JSON.parse('"""+jsonfile+"""');

myDropdown(select,Object.keys(objTum)) 

document.getElementById("selectSample"""+str(id)+"""").onchange = function() {myTable("""+str(id)+""",objTum)};

myTable("""+str(id)+""",objTum)

</script>
</body>
"""

HTML(javascript)

- **CATEGORY**: Distinguishes either 
    - PAIRED: for a fragment run.
    - FIRST_OF_PAIR: when metrics are for only the first read in a paired run. 
    - SECOND_OF_PAIR: when metrics are for only the second read in a paired run.
    - PAIR: when metrics are aggregated for both first and second reads in a pair. 


- **TOTAL_READS**: The total number of reads 

- **PF_READS_ALIGNED**: The number of reads that aligned to the reference sequence

- **PCT_PF_READS_ALIGNED**: The percentage of reads that aligned to the reference sequence

In [None]:
display(Markdown("___"))

commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command2 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon plot GCBias -o "+TUMOR+"T_gc-report.pdf "+TUMOR+"T_gc_metrics.txt"
    print(command2+'\n')
    commands.append(command2)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
#interact(plots, Sample=widgTumAll, file=fixed("_T_gc-report.pdf"), normal=fixed(False), listSample = fixed(tumors));

id = id +1

input_form = """

<select id='selectSample"""+str(id)+"""'>  </select>

<body onload="myPlots()">

<h1>The sample is:  <span id="myText"""+str(id)+""""></span></h1>

<div>
  <iframe id="pdf"""+str(id)+"""" src="" style="width:100%;height:700px;"></iframe>
</div>


</body>

"""

javascript = """

<script type="text/Javascript">

var select = document.getElementById("selectSample"""+str(id)+"""");

myDropdown(select,Object.keys(objTum)) 

var type"""+str(id)+""" = "_T_gc-report.pdf"

myPlots(type"""+str(id)+""","""+str(id)+""")

document.getElementById("selectSample"""+str(id)+"""").onchange = function() {myPlots(type"""+str(id)+""","""+str(id)+""")};


</script>
"""
    

HTML(input_form + javascript)

In [None]:
display(Markdown("___"))

commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command3 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon plot QualDistribution -o "+TUMOR+"T_qd-report.pdf "+TUMOR+"T_qd_metrics.txt"
    print(command3+'\n')
    commands.append(command3)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
#interact(plots, Sample=widgTumAll, file=fixed("_T_qd-report.pdf"), normal=fixed(False), listSample = fixed(tumors));

id = id +1

input_form = """

<select id='selectSample"""+str(id)+"""'>  </select>

<body onload="myPlots()">

<h1>The sample is:  <span id="myText"""+str(id)+""""></span></h1>

<div>
  <iframe id="pdf"""+str(id)+"""" src="" style="width:100%;height:700px;"></iframe>
</div>


</body>

"""

javascript = """

<script type="text/Javascript">

var select = document.getElementById("selectSample"""+str(id)+"""");

myDropdown(select,Object.keys(objTum)) 

var type"""+str(id)+""" = "_T_qd-report.pdf"

myPlots(type"""+str(id)+""","""+str(id)+""")

document.getElementById("selectSample"""+str(id)+"""").onchange = function() {myPlots(type"""+str(id)+""","""+str(id)+""")};


</script>
"""
    

HTML(input_form + javascript)

In [None]:
display(Markdown("___"))

commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command4 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon plot MeanQualityByCycle -o "+TUMOR+"T_mq-report.pdf "+TUMOR+"T_mq_metrics.txt"
    print(command4+'\n')
    commands.append(command4)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
#interact(plots, Sample=widgTumAll, file=fixed("_T_mq-report.pdf"), normal=fixed(False), listSample = fixed(tumors));
#embed_minimal_html('export.html', views=[widgTumAll], title='Widgets export')

id = id +1

input_form = """

<select id='selectSample"""+str(id)+"""'>  </select>

<body onload="myPlots()">

<h1>The sample is:  <span id="myText"""+str(id)+""""></span></h1>

<div>
  <iframe id="pdf"""+str(id)+"""" src="" style="width:100%;height:700px;"></iframe>
</div>


</body>

"""

javascript = """

<script type="text/Javascript">

var select = document.getElementById("selectSample"""+str(id)+"""");

myDropdown(select,Object.keys(objTum)) 

var type"""+str(id)+""" = "_T_mq-report.pdf"

myPlots(type"""+str(id)+""","""+str(id)+""")

document.getElementById("selectSample"""+str(id)+"""").onchange = function() {myPlots(type"""+str(id)+""","""+str(id)+""")};


</script>
"""
    

HTML(input_form + javascript)

In [None]:
display(Markdown("___"))

commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command5 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon plot InsertSizeMetricAlgo -o "+TUMOR+"T_is-report.pdf "+TUMOR+"T_is_metrics.txt"
    print(command5+'\n')
    commands.append(command5)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
#interact(plots, Sample=widgTumAll, file=fixed("_T_is-report.pdf"), normal=fixed(False), listSample = fixed(tumors));

id = id +1

input_form = """

<select id='selectSample"""+str(id)+"""'>  </select>

<body onload="myPlots()">

<h1>The sample is:  <span id="myText"""+str(id)+""""></span></h1>

<div>
  <iframe id="pdf"""+str(id)+"""" src="" style="width:100%;height:700px;"></iframe>
</div>


</body>

"""

javascript = """

<script type="text/Javascript">

var select = document.getElementById("selectSample"""+str(id)+"""");

myDropdown(select,Object.keys(objTum)) 

var type"""+str(id)+""" = "_T_is-report.pdf"

myPlots(type"""+str(id)+""","""+str(id)+""")

document.getElementById("selectSample"""+str(id)+"""").onchange = function() {myPlots(type"""+str(id)+""","""+str(id)+""")};


</script>
"""
    

HTML(input_form + javascript)

# 3a. Remove Duplicate Reads for normal sample

In [None]:
commands = []

for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command1 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -t "+str(data['nt'])+" -i "+SAMPLE+"N_sorted.bam --algo LocusCollector --fun score_info "+SAMPLE+"N_score.txt"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))

commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command2 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -t "+str(data['nt'])+" -i "+SAMPLE+"N_sorted.bam --algo Dedup --rmdup --score_info "+SAMPLE+"N_score.txt --metrics "+SAMPLE+"N_dedup_metrics.txt "+SAMPLE+"N_deduped.bam"
    print(command2+'\n')
    commands.append(command2)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

# 3a. Remove Duplicate Reads for tumor sample

In [None]:
commands = []

for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command1 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -t "+str(data['nt'])+" -i "+TUMOR+"T_sorted.bam --algo LocusCollector --fun score_info "+TUMOR+"T_score.txt"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
commands = []

display(Markdown("___"))
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command2 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -t "+str(data['nt'])+" -i "+TUMOR+"T_sorted.bam --algo Dedup --rmdup --score_info "+TUMOR+"T_score.txt --metrics "+TUMOR+"T_dedup_metrics.txt "+TUMOR+"T_deduped.bam"
    print(command2+'\n')
    commands.append(command2)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

# 4a. Indel realigner for normal sample

In [None]:
commands = []

for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command1 = data['SENTIEON_INSTALL_DIR']+ "/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+SAMPLE+"N_deduped.bam --algo Realigner -k "+data['known_Mills_indels']+" -k "+data['known_1000G_indels']+" "+SAMPLE+"N_realigned.bam"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

# 4a. Indel realigner for tumor sample

In [None]:
commands = []

for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command1 = data['SENTIEON_INSTALL_DIR']+ "/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+TUMOR+"T_deduped.bam --algo Realigner -k "+data['known_Mills_indels']+" -k "+data['known_1000G_indels']+" "+TUMOR+"T_realigned.bam"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

# 5a. Base recalibration for normal sample

In [None]:
commands = []

for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command1 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+SAMPLE+"N_realigned.bam --algo QualCal -k "+data['dbsnp']+" -k "+data['known_Mills_indels']+" -k "+data['known_1000G_indels']+" "+SAMPLE+"N_recal_data.table"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))

commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command2 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+SAMPLE+"N_realigned.bam -q "+SAMPLE+"N_recal_data.table --algo QualCal -k "+data['dbsnp']+" -k "+data['known_Mills_indels']+" -k "+data['known_1000G_indels']+" "+SAMPLE+"N_recal_data.table.post"
    print(command2+'\n')
    commands.append(command2)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))

commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command3 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -t "+str(data['nt'])+" --algo QualCal --plot --before "+SAMPLE+"N_recal_data.table --after "+SAMPLE+"N_recal_data.table.post "+SAMPLE+"N_recal.csv"
    print(command3+'\n')
    commands.append(command3)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))

commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command4 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon plot QualCal -o "+SAMPLE+"N_recal_plots.pdf "+SAMPLE+"N_recal.csv"
    print(command4+'\n')
    commands.append(command4)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

In [None]:
#SAMPLE = batch[0][0]
#img = WImage(filename=SAMPLE+"N_recal_plots.pdf")
    
#interact(multiPage, Sample=widgNor, page=[1,2] , file=fixed("_N_recal_plots.pdf"), normal=fixed(True), listSample = fixed([]));

id = id +1

input_form = """

<select id='selectSample"""+str(id)+"""'>  </select>

<body onload="myPlots()">

<h1>The sample is:  <span id="myText"""+str(id)+""""></span></h1>

<div>
  <iframe id="pdf"""+str(id)+"""" src="" style="width:100%;height:700px;"></iframe>
</div>


</body>

"""

javascript = """

<script type="text/Javascript">

var select = document.getElementById("selectSample"""+str(id)+"""");

myDropdown(select,Object.keys(objNorm)) 

var type"""+str(id)+""" = "_N_recal_plots.pdf"

myPlots(type"""+str(id)+""","""+str(id)+""")

document.getElementById("selectSample"""+str(id)+"""").onchange = function() {myPlots(type"""+str(id)+""","""+str(id)+""")};


</script>
"""
    

HTML(input_form + javascript)




In [None]:
display(Markdown("___"))
# ReadWriter to output recalibrated bam
commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command5 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+SAMPLE+"N_realigned.bam -q "+SAMPLE+"N_recal_data.table --algo ReadWriter "+SAMPLE+"N_recal.bam"
    print(command5+'\n')
    commands.append(command5)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

# 5a. Base recalibration for tumor sample

In [None]:
commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command1 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+TUMOR+"T_realigned.bam --algo QualCal -k "+data['dbsnp']+" -k "+data['known_Mills_indels']+" -k "+data['known_1000G_indels']+" "+TUMOR+"T_recal_data.table"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))
commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command2 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+TUMOR+"T_realigned.bam -q "+TUMOR+"T_recal_data.table --algo QualCal -k "+data['dbsnp']+" -k "+data['known_Mills_indels']+" -k "+data['known_1000G_indels']+" "+TUMOR+"T_recal_data.table.post"
    print(command2+'\n')
    commands.append(command2)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))

commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command3 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -t "+str(data['nt'])+" --algo QualCal --plot --before "+TUMOR+"T_recal_data.table --after "+TUMOR+"T_recal_data.table.post "+TUMOR+"T_recal.csv"
    print(command3+'\n')
    commands.append(command3)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))

commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command4 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon plot QualCal -o "+TUMOR+"T_recal_plots.pdf "+TUMOR+"T_recal.csv"
    print(command4+'\n')
    commands.append(command4)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
#interact(multiPage, Sample=widgTum, page=[1,2] , file=fixed("_T_recal_plots.pdf"), normal=fixed(True), listSample = fixed(tumors));

id = id +1

input_form = """

<select id='selectSample"""+str(id)+"""'>  </select>

<body onload="myPlots()">

<h1>The sample is:  <span id="myText"""+str(id)+""""></span></h1>

<div>
  <iframe id="pdf"""+str(id)+"""" src="" style="width:100%;height:700px;"></iframe>
</div>


</body>

"""

javascript = """

<script type="text/Javascript">

var select = document.getElementById("selectSample"""+str(id)+"""");

myDropdown(select,Object.keys(objTum)) 

var type"""+str(id)+""" = "_T_recal_plots.pdf"

myPlots(type"""+str(id)+""","""+str(id)+""")

document.getElementById("selectSample"""+str(id)+"""").onchange = function() {myPlots(type"""+str(id)+""","""+str(id)+""")};


</script>
"""
    
HTML(input_form + javascript)

In [None]:
display(Markdown("___"))
# ReadWriter to output recalibrated bam

commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command5 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+TUMOR+"T_realigned.bam -q "+TUMOR+"T_recal_data.table --algo ReadWriter "+TUMOR+"T_recal.bam"
    print(command5+'\n')
    commands.append(command5)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

# 7a. HC Variant caller (normal)

In [None]:
commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command1 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+SAMPLE+"N_recal.bam --algo Haplotyper -d "+data['dbsnp']+" --emit_conf=30 --call_conf=30 "+SAMPLE+"N-output-hc.vcf.gz"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

# 7a. HC Variant caller (tumor)

In [None]:
commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command1 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+TUMOR+"T_recal.bam --algo Haplotyper -d "+data['dbsnp']+" --emit_conf=30 --call_conf=30 "+TUMOR+"T-output-hc.vcf.gz"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

# 8a. Variant calling DNAscope (normal)

In [None]:
commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command1 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -t "+str(data['nt'])+" -r "+data['fasta']+" -i "+SAMPLE+"N_recal.bam --algo DNAscope -d "+data['dbsnp']+" --model "+data['ML_MODEL_N']+" "+SAMPLE+"N-tmpDNAscope.vcf.gz"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))
commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command2 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -t "+str(data['nt'])+" -r "+data['fasta']+" --algo DNAModelApply --model "+data['ML_MODEL_N']+" -v "+SAMPLE+"N-tmpDNAscope.vcf.gz "+SAMPLE+"N-DNAscope.vcf.gz"
    print(command2+'\n')
    commands.append(command2)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))
commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    command3 = data['bcfdir'] + " filter -s ML_FAIL -i INFO/ML_PROB > 0.81 "+SAMPLE+"N-DNAscope.vcf.gz -O z -m x -o "+SAMPLE+"N-filtDNAscope.vcf.gz"
    print(command3+'\n')
    commands.append(command3)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

# 8a. Variant calling DNAscope (tumor)

In [None]:
commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command1 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -t "+str(data['nt'])+" -r "+data['fasta']+" -i "+TUMOR+"T_recal.bam --algo DNAscope -d "+data['dbsnp']+" --model "+data['ML_MODEL_N']+" "+TUMOR+"T-tmpDNAscope.vcf.gz"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))
commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command2 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -t "+str(data['nt'])+" -r "+data['fasta']+" --algo DNAModelApply --model "+data['ML_MODEL_N']+" -v "+TUMOR+"T-tmpDNAscope.vcf.gz "+TUMOR+"T-DNAscope.vcf.gz"
    print(command2+'\n')
    commands.append(command2)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))
commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    command3 = data['bcfdir'] + " filter -s ML_FAIL -i INFO/ML_PROB > 0.81 "+TUMOR+"T-DNAscope.vcf.gz -O z -m x -o "+TUMOR+"T-filtDNAscope.vcf.gz"
    print(command3+'\n')
    commands.append(command3)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

# 9a. Variant Annotation (normal)

In [None]:
commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    data['SENTIEON_TMPDIR'] = data['workdir']+SAMPLE+"/tmpdir"
    command1 = "/storage/gluster/vol1/bcbio/anaconda/bin/snpEff -Xms1000m -Xmx36400m -Djava.io.tmpdir="+data['SENTIEON_TMPDIR']+" eff -noStats -t -noLog -dataDir /storage/gluster/vol1/bcbio/genomes/Hsapiens/hg19/snpeff -hgvs -noLof -i vcf -o vcf -noInteraction -noMotif -noNextProt -strict GRCh37.75 "+SAMPLE+"N-output-hc.vcf.gz | "+data['bgzipdir']+" --threads "+str(data['nt'])+" -c > "+SAMPLE+"N-output-hc.snpEff.vcf.gz"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))
commands = []
for i in range(normSize):
    SAMPLE = uniqNorm[i]
    display(Markdown("### Normal: " + SAMPLE))
    data['SENTIEON_TMPDIR'] = data['workdir']+SAMPLE+"/tmpdir"
    command2 = "/storage/gluster/vol1/bcbio/anaconda/bin/snpEff -Xms1000m -Xmx36400m -Djava.io.tmpdir="+data['SENTIEON_TMPDIR']+" eff -noStats -t -noLog -dataDir /storage/gluster/vol1/bcbio/genomes/Hsapiens/hg19/snpeff -hgvs -noLof -i vcf -o vcf -noInteraction -noMotif -noNextProt -strict GRCh37.75 "+SAMPLE+"N-filtDNAscope.vcf.gz | "+data['bgzipdir']+" --threads "+str(data['nt'])+" -c > "+SAMPLE+"N-filtDNAscope.snpEff.vcf.gz"
    print(command2+'\n')
    commands.append(command2)

if exec_scripts:
    commandsParallel(commands,normSize,data['samplesParallel'])

# 9a. Variant Annotation (tumor)

In [None]:
commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    data['SENTIEON_TMPDIR'] = data['workdir']+TUMOR+"/tmpdir"
    command1 = "/storage/gluster/vol1/bcbio/anaconda/bin/snpEff -Xms1000m -Xmx36400m -Djava.io.tmpdir="+data['SENTIEON_TMPDIR']+" eff -noStats -t -noLog -dataDir /storage/gluster/vol1/bcbio/genomes/Hsapiens/hg19/snpeff -hgvs -noLof -i vcf -o vcf -noInteraction -noMotif -noNextProt -strict GRCh37.75 "+TUMOR+"T-output-hc.vcf.gz | "+data['bgzipdir']+" --threads "+str(data['nt'])+" -c > "+TUMOR+"T-output-hc.snpEff.vcf.gz"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))
commands = []
for i in range(batchSize):
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR))
    data['SENTIEON_TMPDIR'] = data['workdir']+TUMOR+"/tmpdir"
    command2 = "/storage/gluster/vol1/bcbio/anaconda/bin/snpEff -Xms1000m -Xmx36400m -Djava.io.tmpdir="+data['SENTIEON_TMPDIR']+" eff -noStats -t -noLog -dataDir /storage/gluster/vol1/bcbio/genomes/Hsapiens/hg19/snpeff -hgvs -noLof -i vcf -o vcf -noInteraction -noMotif -noNextProt -strict GRCh37.75 "+TUMOR+"T-filtDNAscope.vcf.gz | "+data['bgzipdir']+" --threads "+str(data['nt'])+" -c > "+TUMOR+"T-filtDNAscope.snpEff.vcf.gz"
    print(command2+'\n')
    commands.append(command2)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

# 7b. Somatic Variant Calling TNseq

In [None]:
commands = []
for i in range(batchSize):
    SAMPLE = batch[i][0]
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR + " Normal: " + SAMPLE))
    command1 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+TUMOR+"T_recal.bam -i "+SAMPLE+"N_recal.bam --algo TNsnv --tumor_sample "+TUMOR+"T --normal_sample "+SAMPLE+"N --pon "+data['panel_of_normal_TNsnv'] +" --cosmic "+data['cosmic_db']+" --dbsnp "+data['dbsnp']+" --call_stats_out "+TUMOR+"-call.stats "+TUMOR+"-TNsnv.vcf.gz"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))
commands = []
for i in range(batchSize):
    SAMPLE = batch[i][0]
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR + " Normal: " + SAMPLE))
    command2 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+TUMOR+"T_recal.bam -i "+SAMPLE+"N_recal.bam --algo TNhaplotyper --tumor_sample "+TUMOR+"T --normal_sample "+SAMPLE+"N --pon "+data['panel_of_normal_TNhaplotyper']+" --cosmic "+data['cosmic_db']+" --dbsnp "+data['dbsnp']+" "+TUMOR+"-TNhaplotyper.vcf.gz"
    print(command2+'\n')
    commands.append(command2)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

# 8b. Somatic Variant calling TNscope

In [None]:
commands = []
for i in range(batchSize):
    SAMPLE = batch[i][0]
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR + " Normal: " + SAMPLE))
    command1 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -r "+data['fasta']+" -t "+str(data['nt'])+" -i "+TUMOR+"T_recal.bam -i "+SAMPLE+"N_recal.bam --algo TNscope --tumor_sample "+TUMOR+"T --normal_sample "+SAMPLE+"N --dbsnp "+data['dbsnp']+" --clip_by_minbq 1 --max_error_per_read 3 --min_init_tumor_lod 2.0 --min_base_qual 10 --min_base_qual_asm 10 --min_tumor_allele_frac 0.00005 "+TUMOR+"-tmpTNscope.vcf.gz"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))
commands = []
for i in range(batchSize):
    SAMPLE = batch[i][0]
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR + " Normal: " + SAMPLE))
    command2 = data['SENTIEON_INSTALL_DIR']+"/bin/sentieon driver -t "+str(data['nt'])+" -r "+data['fasta']+" --algo TNModelApply --model "+data['ML_MODEL_T'] +" -v "+TUMOR+"-tmpTNscope.vcf.gz "+TUMOR+"-TNscope.vcf.gz"
    print(command2+'\n')
    commands.append(command2)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))
commands = []
for i in range(batchSize):
    SAMPLE = batch[i][0]
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR + " Normal: " + SAMPLE))
    command3 = data['bcfdir'] + " filter -s ML_FAIL -i \INFO/ML_PROB > 0.81 "+TUMOR+"-TNscope.vcf.gz -O z -m x -o "+TUMOR+ "-filtTNscope.vcf.gz"
    print(command3+'\n')
    commands.append(command3)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

# 9a. Somatic Variant Annotation

In [None]:
commands = []
for i in range(batchSize):
    SAMPLE = batch[i][0]
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR + " Normal: " + SAMPLE))
    data['SENTIEON_TMPDIR'] = data['workdir']+TUMOR+"/tmpdir"
    command1 = "/storage/gluster/vol1/bcbio/anaconda/bin/snpEff -Xms1000m -Xmx36400m -Djava.io.tmpdir="+data['SENTIEON_TMPDIR']+" eff -noStats -t -noLog -dataDir /storage/gluster/vol1/bcbio/genomes/Hsapiens/hg19/snpeff -hgvs -noLof -i vcf -o vcf -noInteraction -noMotif -noNextProt -strict GRCh37.75 "+TUMOR+"-TNsnv.vcf.gz | "+data['bgzipdir']+" --threads "+str(data['nt'])+" -c > "+TUMOR+"-TNsnv.snpEff.vcf.gz"
    print(command1+'\n')
    commands.append(command1)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))
commands = []
for i in range(batchSize):
    SAMPLE = batch[i][0]
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR + " Normal: " + SAMPLE))
    data['SENTIEON_TMPDIR'] = data['workdir']+TUMOR+"/tmpdir"
    command2 = "/storage/gluster/vol1/bcbio/anaconda/bin/snpEff -Xms1000m -Xmx36400m -Djava.io.tmpdir="+data['SENTIEON_TMPDIR']+" eff -noStats -t -noLog -dataDir /storage/gluster/vol1/bcbio/genomes/Hsapiens/hg19/snpeff -hgvs -noLof -i vcf -o vcf -noInteraction -noMotif -noNextProt -strict GRCh37.75 "+TUMOR+"-TNhaplotyper.vcf.gz | "+data['bgzipdir']+" --threads "+str(data['nt'])+" -c > "+TUMOR+"-TNhaplotyper.snpEff.vcf.gz"
    print(command2+'\n')
    commands.append(command2)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [None]:
display(Markdown("___"))
commands = []
for i in range(batchSize):
    SAMPLE = batch[i][0]
    TUMOR = batch[i][1]
    display(Markdown("### Tumor: "+TUMOR + " Normal: " + SAMPLE))
    data['SENTIEON_TMPDIR'] = data['workdir']+TUMOR+"/tmpdir"
    command3 = "/storage/gluster/vol1/bcbio/anaconda/bin/snpEff -Xms1000m -Xmx36400m -Djava.io.tmpdir="+data['SENTIEON_TMPDIR']+" eff -noStats -t -noLog -dataDir /storage/gluster/vol1/bcbio/genomes/Hsapiens/hg19/snpeff -hgvs -noLof -i vcf -o vcf -noInteraction -noMotif -noNextProt -strict GRCh37.75 "+TUMOR+"-filtTNscope.vcf.gz | "+data['bgzipdir']+" --threads "+str(data['nt'])+" -c > "+TUMOR+"-filtTNscope.snpEff.vcf.gz"
    print(command3+'\n')
    commands.append(command3)

if exec_scripts:
    commandsParallel(commands,batchSize,data['samplesParallel'])

In [3]:
%%capture
#os.chdir(data['home'])
!rm html_pipeline.zip
!rm -r ./data
!mkdir data

In [4]:
%%capture
!jupyter nbconvert --no-input --execute --ExecutePreprocessor.timeout=-1 --output-dir='./data' --to html pipeline.ipynb
!cp /storage/gluster/vol1/data/PUBLIC/SCAMBIO/ABT414_WES_Analysis/ABT414_Flank/ABT414_Flank/*.pdf ./data
!cp /storage/gluster/vol1/data/PUBLIC/SCAMBIO/ABT414_WES_Analysis/ABT414_Flank/ABT414_Flank/*_N_aln_metrics.txt ./data
!cp /storage/gluster/vol1/data/PUBLIC/SCAMBIO/ABT414_WES_Analysis/ABT414_Flank/ABT414_Flank/*_T_aln_metrics.txt ./data
!zip -r "html_pipeline" "./data"


In [None]:
#!python -m nbconvert /home/adefalco/pipeline.ipynb  --to ipynb --execute

In [None]:
#import nbinteract as nbi
#nbi.publish('AntonioDeFalco/testInteract/master', 'pipeline.ipynb')

In [None]:
#!jupyter nbconvert /home/adefalco/pipeline.ipynb --to rst

In [None]:
#!jupyter nbconvert /home/adefalco/pipeline.ipynb --to Interact

In [None]:
#!zip -r "html_docu" "./build"