In [1]:
=begin

Purpose: Create First Generation Output File From DesignStudio

Use: CIViC_DesignStudio will pull existing variants from the CIViC Knowledgebase, iterate through all variants, and create an output that can be used to create IDT probes

Inputs: 1) variants of interest = from the CIViC Probe DesignStudio Interface, any variants that are selected will be evaluated for probe design.

Outputs: 1) CIViC_DesignStudio_coordinates.tsv = for each variant selected, we will provide the chromosome, start, stop, gene, variant and pipeline

=end

In [2]:
#!/usr/bin/env ruby

require "rubygems"
require "json"
require "net/http"
require "uri"

false

In [3]:
#pull in CIViC API for DNA-based variants 
url_DNA = 'https://civic.genome.wustl.edu/api/panels/DNA-based/qualifying_variants?minimum_score=0'
resp_DNA = Net::HTTP.get_response(URI.parse(url_DNA))
variants_DNA = JSON.parse(resp_DNA.body)['records']

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


In [4]:
#set list for DNA-based capture
capture_DNA = []
capture_DNA << ["chrom", "start", "stop", "chrom2", "start2", "stop2", "gene", "variant", "pipeline"]

[["chrom", "start", "stop", "chrom2", "start2", "stop2", "gene", "variant", "pipeline"]]

In [5]:
## For variants listed in the DNA-based API, create bed-like files for capture design
pipeline = 'DNA-based' #set pipeline
blank = ' ' #set blanks

#iterate through evidence items
for item in variants_DNA
  flag = false #set flag for coordinates; false = chr1,start1,stop1 only
  chrom = item['coordinates']['chromosome'] #call chrom
  start = item['coordinates']['start'] #call start
  stop = item['coordinates']['stop']
  if item['coordinates']['chromosome2'] and item['coordinates']['start2'] and item['coordinates']['stop2'] #determine if there is a second set of coordinates
    chrom2 = item['coordinates']['chromosome2']  # call chrom2
    start2 = item['coordinates']['start2']  # call start2
    stop2 = item['coordinates']['stop2']  # call stop2
    flag = true #set flag for coordinates; true = second set of coordinates available
  end
  gene = item['entrez_name']  #Call Gene name
  variant = item['name'] #call variant
  if flag #if there is two sets of coordinates
    capture_DNA << [chrom, start, stop, chrom2, start2, stop2, gene, variant, pipeline] #append to list
  end
  if not flag #if there is only one set of coordinates
    capture_DNA << [chrom, start, stop, blank, blank, blank, gene, variant, pipeline] #append to list
  end
end

[["chrom", "start", "stop", "chrom2", "start2", "stop2", "gene", "variant", "pipeline"], ["17", 7571720, 7590856, " ", " ", " ", "TP53", "WILD TYPE", "DNA-based"], ["7", 140434279, 140624564, " ", " ", " ", "BRAF", "WILD TYPE", "DNA-based"], ["14", 106032614, 107288051, "18", 60794268, 60987019, "BCL2", "IGH-BCL2", "DNA-based"], ["7", 140434279, 140624564, " ", " ", " ", "BRAF", "V600E AMPLIFICATION", "DNA-based"], ["11", 69455855, 69469242, " ", " ", " ", "CCND1", "AMPLIFICATION", "DNA-based"], ["3", 71003844, 71633140, " ", " ", " ", "FOXP1", "AMPLIFICATION", "DNA-based"], ["2", 61108709, 61149800, " ", " ", " ", "REL", "AMPLIFICATION", "DNA-based"], ["2", 42396490, 42528380, "2", 29415640, 29446394, "ALK", "EML4-ALK AMPLIFICATION", "DNA-based"], ["20", 54944446, 54967393, " ", " ", " ", "AURKA", "AMPLIFICATION", "DNA-based"], ["19", 30302805, 30315215, " ", " ", " ", "CCNE1", "AMPLIFICATION", "DNA-based"], ["7", 55086794, 55279321, " ", " ", " ", "EGFR", "AMPLIFICATION", "DNA-based"

In [6]:
#pull in CIViC API for RNA-based variants 
url_RNA = 'https://civic.genome.wustl.edu/api/panels/RNA-based/qualifying_variants?minimum_score=0'
resp_RNA = Net::HTTP.get_response(URI.parse(url_RNA))
variants_RNA = JSON.parse(resp_RNA.body)['records']



In [7]:
#set list for RNA-based capture
capture_RNA = []
capture_RNA << ["chrom", "start", "stop", "chrom2", "start2", "stop2", "gene", "variant", "pipeline"]

[["chrom", "start", "stop", "chrom2", "start2", "stop2", "gene", "variant", "pipeline"]]

In [8]:
## For variants listed in the RNA-based API, create bed-like files for capture design
pipeline = 'RNA-based' #set pipeline
blank = ' ' #set blanks

#iterate through evidence items
for item in variants_RNA
  flag = false #set flag for coordinates; false = chr1,start1,stop1 only
  chrom = item['coordinates']['chromosome'] #call chrom
  start = item['coordinates']['start'] #call start
  stop = item['coordinates']['stop']
  if item['coordinates']['chromosome2'] and item['coordinates']['start2'] and item['coordinates']['stop2'] #determine if there is a second set of coordinates
    chrom2 = item['coordinates']['chromosome2']  # call chrom2
    start2 = item['coordinates']['start2']  # call start2
    stop2 = item['coordinates']['stop2']  # call stop2
    flag = true #set flag for coordinates; true = second set of coordinates available
  end
  gene = item['entrez_name']  #Call Gene name
  variant = item['name'] #call variant
  if flag #if there is only one set of coordinates
    capture_RNA << [chrom, start, stop, chrom2, start2, stop2, gene, variant, pipeline] #append to list
  end
  if not flag #if there are two sets of coordinates
    capture_RNA << [chrom, start, stop, blank, blank, blank, gene, variant, pipeline] #append to list
  end
end


[["chrom", "start", "stop", "chrom2", "start2", "stop2", "gene", "variant", "pipeline"], ["11", 69455855, 69469242, " ", " ", " ", "CCND1", "EXPRESSION", "RNA-based"], ["11", 69455855, 69469242, " ", " ", " ", "CCND1", "OVEREXPRESSION", "RNA-based"], ["12", 4382938, 4414516, " ", " ", " ", "CCND2", "OVEREXPRESSION", "RNA-based"], ["12", 4381437, 4382937, " ", " ", " ", "CCND2", "PROMOTER DEMETHYLATION", "RNA-based"], ["19", 30302805, 30315215, " ", " ", " ", "CCNE1", "OVEREXPRESSION", "RNA-based"], ["12", 58141510, 58146304, " ", " ", " ", "CDK4", "EXPRESSION", "RNA-based"], ["9", 21974827, 21994591, " ", " ", " ", "CDKN2A", "PROMOTER HYPERMETHYLATION", "RNA-based"], ["3", 128198270, 128212028, " ", " ", " ", "GATA2", "EXPRESSION", "RNA-based"], ["10", 131264495, 131265656, " ", " ", " ", "MGMT", "PROMOTER METHYLATION", "RNA-based"], ["18", 60790579, 60987361, " ", " ", " ", "BCL2", "EXPRESSION", "RNA-based"], ["11", 2150348, 2170833, " ", " ", " ", "IGF2", "OVEREXPRESSION", "RNA-based