# Using Genestack Omics APIs for querying expression values

### Set instance and token

In [1]:
library(integrationCurator)
library(expressionUser)
library(stringi)
suppressMessages(library(tidyverse))
suppressMessages(library(integrationCurator)) # Genestack client library

Sys.setenv(PRED_SPOT_HOST = 'br-dev.genestack.net',
           PRED_SPOT_TOKEN = 'tknRoot',
           PRED_SPOT_VERSION = 'default-released')


Attaching package: ‘expressionUser’


The following objects are masked from ‘package:integrationCurator’:

    ApiClient, Element, IntegrationHelper, ListResponse,
    MetadataContent, MetadataWithId, MetaResponse, PaginationInfo,
    Response


“package ‘stringi’ was built under R version 4.0.2”
“package ‘tidyverse’ was built under R version 4.0.2”
“package ‘ggplot2’ was built under R version 4.0.2”
“package ‘tibble’ was built under R version 4.0.2”
“package ‘tidyr’ was built under R version 4.0.2”
“package ‘readr’ was built under R version 4.0.2”
“package ‘purrr’ was built under R version 4.0.2”
“package ‘dplyr’ was built under R version 4.0.2”
“package ‘stringr’ was built under R version 4.0.2”
“package ‘forcats’ was built under R version 4.0.2”


### Get samples by study accession

In [2]:
study_acc = "GSF129993"
system.time(
samples <- OmicsQueriesApi_search_samples(
  study_filter=paste('genestack:accession=', study_acc, sep=""),
  sample_filter='Sex=F'
)$content$data[['metadata']]
)
sample_accessions <- samples$`genestack:accession`
sample_accessions

   user  system elapsed 
  0.051   0.013   0.793 

In [3]:
# integration/link/preparation/group/by/study
preparation_group_acc <- PreparationIntegrationApi_get_parents_by_study(id=study_acc)$content$itemId
preparation_group_acc

In [9]:
# integration/link/preparation/preparations-to-samples/by/group/
system.time(preparation_to_samples <- PreparationIntegrationApi_get_preparation_links_to_samples(id=preparation_group_acc)$content$data)

preparation_to_samples$sampleIds <- lapply(preparation_to_samples$sampleIds, function(x) { intersect(x, sample_accessions)})
# preparation_to_samples
indicies = unlist(lapply(preparation_to_samples$sampleIds, function(x) { length(x) >= 1}))
preparation_to_samples = preparation_to_samples[indicies,]
preparation_to_samples


   user  system elapsed 
  0.008   0.000   0.990 

Unnamed: 0_level_0,preparationId,sampleIds
Unnamed: 0_level_1,<chr>,<list>
4,GSF130077,GSF129995
5,GSF130081,GSF129999
6,GSF130080,GSF129998
7,GSF130079,GSF129997
8,GSF130078,GSF129996
9,GSF130085,GSF130003
10,GSF130084,GSF130002
11,GSF130083,GSF130001
12,GSF130082,GSF130000
13,GSF130089,GSF130007


In [4]:
# integration/link/expression/group/by/study/
system.time(expression_group_acc <- ExpressionIntegrationApi_get_parents_by_study(id=study_acc)$content$itemId)
expression_group_acc

   user  system elapsed 
  0.011   0.000   0.159 

In [5]:
# integration/link/expression/run-to-preparation/by/group
system.time(run_to_preparations <- as_tibble(ExpressionIntegrationApi_get_run_to_preparation_pairs(id=expression_group_acc)$content$data))
run_to_preparations

   user  system elapsed 
  0.010   0.001   0.215 

runId,preparationId
<chr>,<chr>
19289,GSF130101
19290,GSF130128
19291,GSF130125
19292,GSF130108
19293,GSF130090
19294,GSF130146
19295,GSF130131
19296,GSF130138
19297,GSF130104
19298,GSF130094


In [7]:
# /expression
feature_list = "CD3E"
run_list <- stri_join_list(run_to_preparations["runId"], sep=",")
system.time(res_expr <- ExpressionSPoTApi_get_expression_data(run_filter=run_list, feature_list=feature_list)$content)
# cursor <- res_expr$cursor
data <- res_expr$data
my_data <- as_tibble(data)
my_data %>% select(runId, gene, expression)

   user  system elapsed 
  0.034   0.006   0.600 

runId,gene,expression
<chr>,<chr>,<dbl>
19289,CD3E,0.7928590
19290,CD3E,0.0000000
19291,CD3E,0.0000000
19292,CD3E,59.5507179
19293,CD3E,6.4663633
19294,CD3E,5.9762160
19295,CD3E,0.0000000
19296,CD3E,0.0000000
19297,CD3E,0.0000000
19298,CD3E,0.0000000


In [11]:
preparation_expressions = left_join(run_to_preparations, my_data, by=c("runId"="runId"))
preparation_expressions %>% select(preparationId, expression)

preparationId,expression
<chr>,<dbl>
GSF130101,0.7928590
GSF130128,0.0000000
GSF130125,0.0000000
GSF130108,59.5507179
GSF130090,6.4663633
GSF130146,5.9762160
GSF130131,0.0000000
GSF130138,0.0000000
GSF130104,0.0000000
GSF130094,0.0000000


In [12]:
sample_expressions = left_join(preparation_to_samples, preparation_expressions, by=c("preparationId"="preparationId"))
sample_expressions %>% select(sampleIds, expression)

sampleIds,expression
<list>,<dbl>
GSF129995,0.0000000
GSF129999,35.7368544
GSF129998,0.0000000
GSF129997,0.0000000
GSF129996,0.0000000
GSF130003,0.0000000
GSF130002,2.1935393
GSF130001,2.1563125
GSF130000,1.7264691
GSF130007,16.8495564
