# API Examples on TCGA

### 1. Connect to the instance

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os, time
import integration_curator # Genestack client library

os.environ['PRED_SPOT_HOST'] = 'occam.genestack.com'
os.environ['PRED_SPOT_TOKEN'] = '<your token>'
os.environ['PRED_SPOT_VERSION'] = 'default-released'

omics_api = integration_curator.OmicsQueriesApi()

### 2. Get samples

In [2]:
study_filter = 'genestack:accession=GSF960537'

start = time.time()
data = omics_api.search_samples(
    study_filter=study_filter
)
samples = pd.DataFrame.from_dict([item['metadata'] for item in data.data])
print('Time to get %s samples: %i seconds\n' % (samples.shape[0], time.time()-start))

samples.head()

Time to get 486 samples: 1 seconds



Unnamed: 0,genestack:accession,Sample Source ID,Sample Name,Organism,Disease,Tissue,Cell Type,Cell Line,Sampling Site,Age,...,tissue_or_organ_of_origin,primary_diagnosis,case_id,case_submitter_id,race,vital_status,age_at_index,days_to_birth,Sample Source,days_to_death
0,GSF960940,TCGA-55-A490,,,,,,,,,...,"Upper lobe, lung","Adenocarcinoma, NOS",b89de053-d253-447f-952e-9a2edcf6bca5,TCGA-55-A490,white,Dead,78,-28728.0,TCGA,99.0
1,GSF960939,TCGA-49-AARE,,,,,,,,,...,"Upper lobe, lung","Adenocarcinoma, NOS",cd9e70e4-8622-4a07-8646-63f8275c1737,TCGA-49-AARE,black or african american,Dead,51,-18893.0,TCGA,1229.0
2,GSF960938,TCGA-55-7994,,,,,,,,,...,"Upper lobe, lung","Adenocarcinoma, NOS",67dbe286-edb3-4d07-8f74-9e5254f01945,TCGA-55-7994,white,Alive,81,-29858.0,TCGA,
3,GSF960937,TCGA-J2-8192,,,,,,,,,...,"Upper lobe, lung","Adenocarcinoma, NOS",369f14c4-2191-4962-a309-3e23ddc4e5fc,TCGA-J2-8192,white,Alive,65,-23892.0,TCGA,
4,GSF960944,TCGA-55-1592,,,,,,,,,...,"Upper lobe, lung","Adenocarcinoma, NOS",a41c46da-7ed4-4192-bd16-b3cbb94a5133,TCGA-55-1592,white,Dead,65,,TCGA,701.0


### 3. Get germline mutations

In [3]:
vx_query = 'Gene=KRAS'
vx_filter = 'Type=Normal'

start = time.time()
data = omics_api.search_variant_data(
    study_filter=study_filter,
    vx_query=vx_query,
    vx_filter=vx_filter,
    page_limit=2
).data

data

[{'itemId': 'GSF962405-26817',
  'metadata': {'Type': 'Normal'},
  'contig': '12',
  'start': 25225628,
  'reference': 'C',
  'alteration': ['G'],
  'variationId': ['.'],
  'info': {'AC': ['0'], 'AN': ['2']},
  'genotype': {'AD': '.',
   'sampleNames': 'TCGA-05-4249',
   'DP': '.',
   'GT': './.'},
  'relationships': {'sample': 'GSF960698'}},
 {'itemId': 'GSF962408-26820',
  'metadata': {'Type': 'Normal'},
  'contig': '12',
  'start': 25225628,
  'reference': 'C',
  'alteration': ['G'],
  'variationId': ['.'],
  'info': {'AC': ['0'], 'AN': ['2']},
  'genotype': {'AD': '.',
   'sampleNames': 'TCGA-05-4389',
   'DP': '.',
   'GT': './.'},
  'relationships': {'sample': 'GSF960558'}}]

### 4. Get somatic mutations

In [4]:
vx_query = 'Gene=KRAS'
vx_filter = 'Type=Tumor'

start = time.time()
data = omics_api.search_variant_data(
    study_filter=study_filter,
    vx_query=vx_query,
    vx_filter=vx_filter,
    page_limit=2
).data

data

[{'itemId': 'GSF961984-26817',
  'metadata': {'Type': 'Tumor'},
  'contig': '12',
  'start': 25225628,
  'reference': 'C',
  'alteration': ['G'],
  'variationId': ['.'],
  'info': {'AC': ['1'], 'AN': ['2']},
  'genotype': {'AD': '.',
   'sampleNames': 'TCGA-05-4249',
   'DP': '.',
   'GT': './.'},
  'relationships': {'sample': 'GSF960698'}},
 {'itemId': 'GSF961987-26820',
  'metadata': {'Type': 'Tumor'},
  'contig': '12',
  'start': 25225628,
  'reference': 'C',
  'alteration': ['G'],
  'variationId': ['.'],
  'info': {'AC': ['1'], 'AN': ['2']},
  'genotype': {'AD': '.',
   'sampleNames': 'TCGA-05-4389',
   'DP': '.',
   'GT': './.'},
  'relationships': {'sample': 'GSF960558'}}]

### 5. Get expression data

In [5]:
gene = 'KRAS'
ex_query = 'Gene=%s MinValue=0.0' % gene
ex_filter = 'Type=FPKM'

start = time.time()
data = omics_api.search_expression_data(
    study_filter=study_filter,
    ex_query=ex_query,
    ex_filter=ex_filter,
    page_limit=2
).data

data

[{'itemId': 'GSF961853-KRAS',
  'metadata': {'Type': 'FPKM', 'Run Source ID': 'TCGA-05-4249'},
  'runId': 'GSF961853',
  'groupId': 'GSF961501',
  'gene': 'KRAS',
  'expression': 31.5975727207,
  'relationships': {'sample': 'GSF960698'}},
 {'itemId': 'GSF961791-KRAS',
  'metadata': {'Type': 'FPKM', 'Run Source ID': 'TCGA-05-4389'},
  'runId': 'GSF961791',
  'groupId': 'GSF961501',
  'gene': 'KRAS',
  'expression': 9.06548155284,
  'relationships': {'sample': 'GSF960558'}}]

### 6. Get copy number data

In [6]:
gene = 'KRAS'
ex_query = 'Gene=%s MinValue=0.0' % gene
ex_filter = 'Type="Copy Number"'

start = time.time()
data = omics_api.search_expression_data(
    study_filter=study_filter,
    ex_query=ex_query,
    ex_filter=ex_filter,
    page_limit=2
).data

data

[{'itemId': 'GSF961100-KRAS',
  'metadata': {'Type': 'Copy Number', 'Run Source ID': 'TCGA-05-4249'},
  'runId': 'GSF961100',
  'groupId': 'GSF961027',
  'gene': 'KRAS',
  'expression': 4.0,
  'relationships': {'sample': 'GSF960698'}},
 {'itemId': 'GSF961424-KRAS',
  'metadata': {'Type': 'Copy Number', 'Run Source ID': 'TCGA-05-4389'},
  'runId': 'GSF961424',
  'groupId': 'GSF961027',
  'gene': 'KRAS',
  'expression': 4.0,
  'relationships': {'sample': 'GSF960558'}}]