# Create First Generation Output File From DesignStudio

#### Usage

CIViC_DesignStudio will pull existing variants from the CIViC Knowledgebase, iterate through all variants, and create an output that can be used to create IDT probes


#### Inputs:

1) variants of interest = from the CIViC Probe DesignStudio Interface, any variants that are selected will be evaluated for probe design.

#### Output Files: 

1) CIViC_DesignStudio_coordinates.tsv = for each variant selected, we will provide the chromosome, start, stop, gene, variant and pipeline


In [2]:
#!/usr/bin/env python3
import json
import numpy as np
import requests
import sys

In [3]:
##Pull in Data from JSON
variants_DNA = requests.get('https://civic.genome.wustl.edu/api/panels/captureseq/qualifying_variants?minimum_score=0').json()['records'] #Call eligible variants
variants_RNA = requests.get('https://civic.genome.wustl.edu/api/panels/nanostring/qualifying_variants?minimum_score=0').json()['records'] #Call eligible variants

In [17]:
#Create output list for probes of interest
capture = []

In [18]:
## For variants listed in the DNA-based API, create bed-like files for capture design

#make dictionary for evidence types
score = {'A':5, 'B':4, 'C':3, 'D':2, 'E':1}

#create empty list for capture sequence probes
capture_sequence_probes = [] 
for k in range(0, len(variants_DNA)): #iterate through API and pull all eligible variants
    chrom = variants_DNA[k]['coordinates']['chromosome'] #call chrom
    start = variants_DNA[k]['coordinates']['start'] #call start
    stop = variants_DNA[k]['coordinates']['stop'] #call stop
    flag = False
    if variants_DNA[k]['coordinates']['chromosome2'] and variants_DNA[k]['coordinates']['start2'] and variants_DNA[k]['coordinates']['stop2']:
        chrom2 = variants_DNA[k]['coordinates']['chromosome2']  # call chrom2
        start2 = variants_DNA[k]['coordinates']['start2']  # call start2
        stop2 = variants_DNA[k]['coordinates']['stop2']  # call stop2
        flag = True
    gene = variants_DNA[k]['entrez_name']  #Call Gene name
    variant = variants_DNA[k]['name'] #call variant
    pipeline = 'DNA-based'
    blank = ' '
    if flag:
        capture.append([chrom, start, stop, chrom2, start2, stop2, gene, variant, pipeline])
    if not flag:
        capture.append([chrom, start, stop, blank, blank, blank, gene, variant, pipeline])

print(len(capture))


612


In [19]:
## For variants listed in the NanoString API, create bed-like files for capture design

nanoString_probes = []  # create empty list for nanostring probes
nanoString_probes.append(['gene', 'soid', 'variant_type', 'variant_name', 'representative_transcript', 'top_evidence_level', 'diseases','evidence_types','number_of_evidence_statements', 'chrom', 'start', 'stop', 'transcript2', 'chrom2', 'start2', 'stop2'])
for k in range(0, len(variants_RNA)):  # iterate through API and pull all eligible variants
    chrom = variants_RNA[k]['coordinates']['chromosome'] #call chrom
    start = variants_RNA[k]['coordinates']['start'] #call start
    stop = variants_RNA[k]['coordinates']['stop'] #call stop
    flag = False
    if variants_RNA[k]['coordinates']['chromosome2'] and variants_RNA[k]['coordinates']['start2'] and variants_RNA[k]['coordinates']['stop2']:
        chrom2 = variants_RNA[k]['coordinates']['chromosome2']  # call chrom2
        start2 = variants_RNA[k]['coordinates']['start2']  # call start2
        stop2 = variants_RNA[k]['coordinates']['stop2']  # call stop2
        flag = True
    gene = variants_RNA[k]['entrez_name']  #Call Gene name
    variant = variants_RNA[k]['name'] #call variant
    pipeline = 'RNA-based'
    blank = ' '
    if flag:
        capture.append([chrom, start, stop, chrom2, start2, stop2, gene, variant, pipeline])
    if not flag:
        capture.append([chrom, start, stop, blank, blank, blank, gene, variant, pipeline])


In [20]:
##Create output files for probe design    
capture_file = open('../../CIViC_DesignStudio/output_files/CIViC_DesignStudio_probes.tsv', 'w') #create empy file for capture sequence coordinates
#write header
capture_file.write('chromosome'+ '\t' + 'start'+ '\t' + 'stop' '\t' + 'chromosome2'+ '\t' + 'start2'+ '\t' + 'stop2' + '\t' + 'gene'+ '\t' + 'variant' + '/t' + 'pipeline' + '\n')
for item in capture: #iterate through capture list
    for k in item:
        if k is item[-1]:
            capture_file.write(str(k))
        else:
            capture_file.write(str(k) + '\t')
    capture_file.write('\n')
capture_file.close() #close file