# Sample selection based on multi-omics data

### Set instance and token

In [1]:
suppressMessages(library(tidyverse))
library(integrationCurator) # Genestack client library

Sys.setenv(PRED_SPOT_HOST = 'occam.genestack.com',
           PRED_SPOT_TOKEN = '<token>',
           PRED_SPOT_VERSION = 'default-released')

“package ‘dplyr’ was built under R version 3.6.3”

### Get individuals

In [2]:
start = Sys.time()
samples <- as_tibble(integrationCurator::OmicsQueriesApi_search_samples(
    study_filter='genestack:accession=GSF535886',
    sample_filter='"Species Or Strain"="British" OR "Species Or Strain"="Finnish"'
)$content$data[['metadata']])
cat(sprintf('Time to get %s samples: %s seconds\n\n', nrow(samples), round(Sys.time()-start)))

head(samples[,c('genestack:accession', 'Sample Source ID', 'Species Or Strain')])

Time to get 182 samples: 1 seconds



genestack:accession,Sample Source ID,Species Or Strain
GSF535900,HG00111,British
GSF535899,HG00110,British
GSF535902,HG00113,British
GSF535901,HG00112,British
GSF535896,HG00106,British
GSF535895,HG00104,British


### Get individuals filtered by genotypes

In [3]:
start = Sys.time()
samples <- as_tibble(integrationCurator::OmicsQueriesApi_search_samples(
    study_filter='genestack:accession=GSF535886',
    sample_filter='"Species Or Strain"="British" OR "Species Or Strain"="Finnish"',
    vx_query='VariationId=rs17007017 AllelesNumber=1'
)$content$data[['metadata']])
cat(sprintf('Time to get %s samples: %s seconds\n\n', nrow(samples), round(Sys.time()-start)))

head(samples[,c('genestack:accession', 'Sample Source ID', 'Species Or Strain')])

Time to get 72 samples: 1 seconds



genestack:accession,Sample Source ID,Species Or Strain
GSF536014,HG00312,Finnish
GSF536004,HG00278,Finnish
GSF536003,HG00277,Finnish
GSF536000,HG00274,Finnish
GSF535999,HG00273,Finnish
GSF536001,HG00275,Finnish


### Get individuals filtered by expression values

In [4]:
start = Sys.time()
samples <- as_tibble(integrationCurator::OmicsQueriesApi_search_samples(
    study_filter='genestack:accession=GSF535886',
    sample_filter='"Species Or Strain"="British" OR "Species Or Strain"="Finnish"',
    ex_query='Gene=ENSG00000109445 MinValue=40'
)$content$data[['metadata']])
cat(sprintf('Time to get %s samples: %s seconds\n\n', nrow(samples), round(Sys.time()-start)))

head(samples[,c('genestack:accession', 'Sample Source ID', 'Species Or Strain')])

Time to get 157 samples: 0 seconds



genestack:accession,Sample Source ID,Species Or Strain
GSF535900,HG00111,British
GSF535899,HG00110,British
GSF535901,HG00112,British
GSF535896,HG00106,British
GSF535895,HG00104,British
GSF535898,HG00109,British


### Get individuals filtered by genotypes and expression values

In [5]:
start = Sys.time()
samples <- as_tibble(integrationCurator::OmicsQueriesApi_search_samples(
    study_filter='genestack:accession=GSF535886',
    sample_filter='"Species Or Strain"="British" OR "Species Or Strain"="Finnish"',
    vx_query='VariationId=rs17007017 AllelesNumber=1',
    ex_query='Gene=ENSG00000109445 MinValue=40'
)$content$data[['metadata']])
cat(sprintf('Time to get %s samples: %s seconds\n\n', nrow(samples), round(Sys.time()-start)))

head(samples[,c('genestack:accession', 'Sample Source ID', 'Species Or Strain')])

Time to get 61 samples: 0 seconds



genestack:accession,Sample Source ID,Species Or Strain
GSF536014,HG00312,Finnish
GSF536004,HG00278,Finnish
GSF536003,HG00277,Finnish
GSF536000,HG00274,Finnish
GSF536001,HG00275,Finnish
GSF535900,HG00111,British
