# Create First Generation Output File From DesignStudio

#### Usage

CIViC_DesignStudio will pull existing variants from the CIViC Knowledgebase, iterate through all variants, and create an output that can be used to create IDT probes


#### Inputs:

1) variants of interest = from the CIViC Probe DesignStudio Interface, any variants that are selected will be evaluated for probe design.

#### Output Files: 

1) CIViC_DesignStudio_coordinates.tsv = for each variant selected, we will provide the chromosome, start, stop, gene, variant and pipeline


In [3]:
#!/usr/bin/env python3
import json
import numpy as np
import requests
import sys

In [6]:
##Pull in Data from JSON
variants_DNA = requests.get('https://civic.genome.wustl.edu/api/panels/captureseq/qualifying_variants?minimum_score=0').json()['records'] #Call eligible variants
variants_RNA = requests.get('https://civic.genome.wustl.edu/api/panels/nanostring/qualifying_variants?minimum_score=0').json()['records'] #Call eligible variants

In [23]:
#Create output list for probes of interest
capture = []

In [24]:
## For variants listed in the DNA-based API, create bed-like files for capture design

#make dictionary for evidence types
score = {'A':5, 'B':4, 'C':3, 'D':2, 'E':1}

#create empty list for capture sequence probes
capture_sequence_probes = [] 
for k in range(0, len(variants_DNA)): #iterate through API and pull all eligible variants
    chrom = variants_DNA[k]['coordinates']['chromosome'] #call chrom
    start = variants_DNA[k]['coordinates']['start'] #call start
    stop = variants_DNA[k]['coordinates']['stop'] #call stop
    gene = variants_DNA[k]['entrez_name']  #Call Gene name
    variant = variants_DNA[k]['name'] #call variant
    pipeline = 'DNA-based'
    capture.append([chrom, start, stop, gene, variant, pipeline])

print(len(capture))


612


In [25]:
## For variants listed in the NanoString API, create bed-like files for capture design

nanoString_probes = []  # create empty list for nanostring probes
nanoString_probes.append(['gene', 'soid', 'variant_type', 'variant_name', 'representative_transcript', 'top_evidence_level', 'diseases','evidence_types','number_of_evidence_statements', 'chrom', 'start', 'stop', 'transcript2', 'chrom2', 'start2', 'stop2'])
for k in range(0, len(variants_RNA)):  # iterate through API and pull all eligible variants
    chrom = variants_RNA[k]['coordinates']['chromosome'] #call chrom
    start = variants_RNA[k]['coordinates']['start'] #call start
    stop = variants_RNA[k]['coordinates']['stop'] #call stop
    gene = variants_RNA[k]['entrez_name']  #Call Gene name
    variant = variants_RNA[k]['name'] #call variant
    pipeline = 'RNA-based'
    capture.append([chrom, start, stop, gene, variant, pipeline])
    

In [26]:
##Create output files for probe design    
capture_file = open('../../CIViC_design_studio/output_files/CIViC_DesignStudio_probes.tsv', 'w') #create empy file for capture sequence coordinates
#write header
capture_file.write('chromosome'+ '\t' + 'start'+ '\t' + 'stop' '\t' + 'gene'+ '\t' + 'variant' + '/t' + 'pipeline' + '\n')
for item in capture: #iterate through capture list
    for k in item:
        if k is item[-1]:
            capture_file.write(str(k))
        else:
            capture_file.write(str(k) + '\t')
    capture_file.write('\n')
capture_file.close() #close file

In [None]:

#     gene = variants_nanostring[k]['entrez_name']  #Call Gene name
#     variant = variants_nanostring[k]['name'] #call variant
#     soid = variants_nanostring[k]['variant_types'][0]['so_id'] #call soid
#     variant_type = variants_nanostring[k]['variant_types'][0]['name'] #call variant type
#     transcript = variants_nanostring[k]['coordinates']['representative_transcript'] #call transcript
#     top_evidence = variants_nanostring[k]
#     diseases = variants_nanostring[k]
#     chrom = variants_nanostring[k]['coordinates']['chromosome'] #call chrom
#     start = variants_nanostring[k]['coordinates']['start'] #call start
#     stop = variants_nanostring[k]['coordinates']['stop'] #call stop
#     evidence = variants_nanostring[k]['evidence_items']
#     evidence_statements = len(variants_nanostring[k]['evidence_items'])
#     diseases = []  # set list for all of the diseases for this varinat
#     evidence_type = []  # set list for evidence types
#     evidence_scores = []  # set list for evidence scores
#     top_evidences = []  # set list for top evidence level
#     for item in evidence:  # iterate through the evidence items
#         if item['disease']['name'] not in diseases:  # see if disease is already there
#             if 'Walden' in item['disease']['name']:  # Change waldenstroms issues (the A is not accepted by R code)
#                 if 'Waldenstroms Macroglobulinemia' not in diseases:  # check if it is already there
#                     diseases.append('Waldenstroms Macroglobulinemia')  # if it is not append to diseases
#             else:  # if its not a weird name
#                 diseases.append(item['disease']['name'])  # append the disease
#         if item['evidence_type'] not in evidence_type:  # see if the evidence type is already there
#             evidence_type.append(item['evidence_type'])  # if it is not append it
#         trust_rating = int(item['rating'] or 0)  # make the trust rating either what is listed or 0
#         evidence_level = int(
#             score[item['evidence_level']])  # make the evidence level the value from the score dictionary
#         evidence_scores.append(evidence_level * trust_rating)  # calculate the Evidence Score
#         if item['evidence_level'] != '[]':  # find the evidence levels that are not blank
#             top_evidences.append(item['evidence_level'].strip())  # add to the list
#     # pull the maximum evidence level
#     if 'A' in top_evidences:
#         top_evidence = 'A'
#     elif 'B' in top_evidences:
#         top_evidence = 'B'
#     elif 'C' in top_evidences:
#         top_evidence = 'C'
#     elif 'D' in top_evidences:
#         top_evidence = 'D'
#     else:
#         top_evidence = 'E'
#     evidence_score = sum(evidence_scores)  # sum the evidence scores to get a CIVic Score
#     disease = ', '.join(diseases)  # format the diseases
#     evidence_types = ', '.join(evidence_type)  # format the evidence types

#     if variants_nanostring[k]['coordinates']['chromosome2'] is not None and variants_nanostring[k]['coordinates']['start2'] is not None and variants_nanostring[k]['coordinates']['stop2'] is not None:  # if there are two chromosomes for the variant
#         chrom2 = variants_nanostring[k]['coordinates']['chromosome2']  # call chrom2
#         start2 = variants_nanostring[k]['coordinates']['start2']  # call start2
#         stop2 = variants_nanostring[k]['coordinates']['stop2']  # call stop2
#         nanoString_probes.append([gene, soid, variant_type, variant, transcript, top_evidence, disease, evidence_types, evidence_statements, chrom, start, stop, chrom2, start2, stop2])  # append new list with bed information
#     else:  # if there is only 1 chromosome for the variant
#         nanoString_probes.append([gene, soid, variant_type, variant, transcript, top_evidence, disease, evidence_types, evidence_statements, chrom, start, stop])  # append new list with bed information
