# Select data for analysis from HISE

## Load libraries

In [1]:
quiet_library <- function(...) { suppressPackageStartupMessages(library(...)) }
quiet_library(hise)
quiet_library(dplyr)
quiet_library(purrr)

## Find files for use in HISE

### RNA and ADT data

In [2]:
rna_desc <- getFileDescriptors(
    fileType = "scRNA-seq-labeled",
    filter = list(file.batchID = "B065")
)
rna_desc <- fileDescToDataframe(rna_desc)

In [3]:
nrow(rna_desc)

In [4]:
rna_desc <- rna_desc %>%
  rename(rna_file.id = file.id)

### ATAC data

In [5]:
atac_desc <- getFileDescriptors(
    fileType = "atac-assembly-archr-arrow",
    filter = list(file.batchID = "B065")
)
atac_desc <- fileDescToDataframe(atac_desc)

In [6]:
nrow(atac_desc)

In [7]:
atac_desc <- atac_desc %>%
  select(sample.sampleKitGuid, file.id) %>%
  rename(atac_file.id = file.id)

## Join and select metadata

In [8]:
names(rna_desc)

In [9]:
file_meta <- rna_desc %>%
  left_join(atac_desc) %>%
  filter(cohort.cohortGuid != "BIOIVT") %>%
  select(cohort.cohortGuid, subject.subjectGuid, subject.birthYear, subject.biologicalSex,
         sample.sampleKitGuid, sample.visitName, rna_file.id, atac_file.id)

[1m[22mJoining with `by = join_by(sample.sampleKitGuid)`


In [10]:
file_meta

cohort.cohortGuid,subject.subjectGuid,subject.birthYear,subject.biologicalSex,sample.sampleKitGuid,sample.visitName,rna_file.id,atac_file.id
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
BR2,BR2005,1963,Female,KT00395,Flu Year 1 Day 7,c9a1c7af-f0c3-41ad-87a7-2a3fb07e682d,32f3d133-f343-4a1e-aca7-13fc3db2f41a
BR2,BR2042,1965,Female,KT00559,Flu Year 1 Day 7,d929e7ef-162b-410b-b765-ae1d7d73bae8,d45a1b4c-b9c2-439d-b6d5-443f6060dcc7
UP1,UP1006,2009,Female,KT00173,Flu Year 1 Pre-Vac 7-12 Weeks,81bd562d-a6ad-4a05-befe-15303d1847de,bd06e203-1531-4c42-bc5b-c5bd5999cccd
BR2,BR2015,1964,Female,KT00396,Flu Year 1 Day 7,78587b33-2b08-4eb4-a392-0c3963ed904e,fe2d1cbd-de72-406a-b553-b7e3f6262cb3
UP1,UP1007,2009,Female,KT00192,Flu Year 1 Pre-Vac 7-12 Weeks,9df3f431-970c-42da-99d2-a8fd4dd8558a,8959f160-2511-40f3-80d5-59060c2a0183
UP1,UP1010,2009,Female,KT00197,Flu Year 1 Pre-Vac 7-12 Weeks,b3c55054-77b5-41cf-91cd-97576dd0ba20,6f53386c-7492-4d44-893b-5f0e2b8d97cf
BR2,BR2002,1962,Female,KT00593,Flu Year 2 Day 0,3f38fe6b-8cfc-4da5-9f65-7db40259c5eb,e1424785-1979-4c81-8a6d-5a3278cd42ef
UP1,UP1001,2009,Female,KT00199,Flu Year 1 Day 7,a9107429-686e-4365-8776-2c079783ccde,8b0b772e-06bd-478c-a4b2-70cd6cbd6887


## Cache these files so we can store their metadata in HISE

In [11]:
file_res <- map(
    file_meta$rna_file.id,
    function(uuid) {
        cacheFiles(list(uuid))
    }
)

file_res <- map(
    file_meta$atac_file.id,
    function(uuid) {
        cacheFiles(list(uuid))
    }
)

In [12]:
if(!dir.exists("output")) {
    dir.create("output")
}

write.csv(
    file_meta,
    "output/sample_meta.csv",
    row.names = FALSE,
    quote = FALSE
)

## Store results in HISE

Finally, we store the output file in our Collaboration Space for later retrieval and use. We need to provide the UUID for our Collaboration Space (aka `studySpaceId`), as well as a title for this step in our analysis process.

The hise function `uploadFiles()` also requires the FileIDs from the original fileset for reference, which we assembled above when files were retrieved.

In [20]:
study_space_uuid <- "00a53fa5-18da-4333-84cb-3cc0b0761201"
title <- "TEA-seq demo sample and file metadata"

In [21]:
search_id <- ids::adjective_animal()
search_id

In [22]:
in_list <- as.list(c(file_meta$rna_file.id, file_meta$atac_file.id))

In [23]:
in_list

In [24]:
out_list <- list("output/sample_meta.csv")

In [25]:
out_list

In [26]:
uploadFiles(
    files = out_list,
    studySpaceId = study_space_uuid,
    title = title,
    inputFileIds = in_list,
    destination = search_id
)

[1] "Cannot determine the current notebook."
[1] "1) /home/jupyter/certpro-workflow-demos/adult_vs_pediatric_teaseq/01-R_get_h5_metadata.ipynb"
[1] "2) /home/jupyter/certpro-workflow-demos/adult_vs_pediatric_teaseq/00-R_select_samples.ipynb"
[1] "3) /home/jupyter/examples/Visualization_apps/dash/save_visualization_app_example.ipynb"


Please select (1-3)  2


You are trying to upload the following files:  output/sample_meta.csv



(y/n) y


In [27]:
sessionInfo()

R version 4.3.2 (2023-10-31)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS/LAPACK: /opt/conda/lib/libopenblasp-r0.3.25.so;  LAPACK version 3.11.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Etc/UTC
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] purrr_1.0.2 dplyr_1.1.4 hise_2.16.0

loaded via a namespace (and not attached):
 [1] jsonlite_1.8.8   compiler_4.3.2   crayon_1.5.2     tidyselect_1.2.0
 [5] Rcpp_1.0.12      IRdisplay_1.1    stringr_1.5.1    bitops_1.0-7    
 [9] assertthat_0.2.1 uuid_1.2-0       fastmap_1.1.1    IRkernel_1.3.2  
[13] mime_