# Convert to Pseudobulk using Seurat

In this notebook, we utilize the normalization and aggregation functions in the Seurat package convert our single-cell data to pseudobulk values for each cell type per sample.

For use in other analyses, we'll retain only the BR1 and BR2 cohorts, and exclude the pediatric cohort because we've included only a few samples from this younger cohort.

## Load packages

In [1]:
quiet_library <- function(...) { suppressPackageStartupMessages(library(...)) }

quiet_library(dplyr)
quiet_library(hise)
quiet_library(H5weaver)
quiet_library(purrr)
quiet_library(furrr)
quiet_library(Seurat)

In [2]:
plan(multicore, workers = 12)

In [3]:
min_detection <- 0.1

In [4]:
if(!dir.exists("output")) {
    dir.create("output")
}

In [5]:
out_files <- c()

## Helper functions
This function assists in reading cell metadata data directly from .h5ad files into R

In [6]:
read_h5ad_cell_meta <- function(h5ad_file) 
{
    h5ad_contents <- H5weaver::h5ls(h5ad_file)
    obs_locs <- h5ad_contents$full_name[h5ad_contents$group == "/obs"]
    obs_locs <- obs_locs[!obs_locs %in% c("/obs/__categories", "/obs/_index")]
    obs_locs <- obs_locs[!grepl("Unnamed", obs_locs)]

    h5ad <- H5Fopen(h5ad_file)

    obs_list <- lapply(obs_locs, function(loc) {h5read(h5ad, loc)})

    obs_list <- lapply(
        obs_list,
        function(obs) {
            if(length(obs) == 2) {
                vals <- vector(length = length(obs$codes))
                vals[obs$codes >= 0] <- as.vector(obs$categories)[as.vector(obs$codes + 1)]
                vals[obs$codes == -1] <- NA
            } else {
                vals <- as.vector(obs)
            }

            vals
        }
    )

    obs_list <- lapply(obs_list, as.vector)
    names(obs_list) <- sub(".+/", "", obs_locs)

    H5Fclose(h5ad)
    as.data.frame(obs_list)
}

This function converts from .h5ad expression values to pseudobulk

In [7]:
h5ad_to_pseudobulk_seurat <- function(h5ad_file, group_by, min_detection = 0.1, filters = NULL) {
    
    meta <- read_h5ad_cell_meta(h5ad_file)
    
    if(!is.null(filters)) {
        for (i in 1:length(filters)) {
            filter_group <- names(filters)[i]
            filter_group <- rlang::parse_expr(filter_group)

            keep_groups <- filters[[i]]
            meta <- meta %>%
                filter(!!filter_group %in% keep_groups)
        }
    }
    
    meta$pb_group <- meta[[group_by[1]]]
    if(length(group_by) > 1) {
        for (i in 2:length(group_by)) {
            meta$pb_group <- paste(meta$pb_group, meta[[group_by[i]]], sep = ".")
        }
    }
    meta$pb_group <- gsub(" ","-",meta$pb_group)
    meta$pb_group <- gsub("_","-",meta$pb_group)
    
    mat <- read_h5ad_dgCMatrix(h5ad_file, feature_names = "_index")
    
    genes <- rownames(mat)
    keep_genes <- genes[!grepl("^RP|^MT-|^LINC|orf",genes)]
    
    type_meta <- split(meta, meta$AIFI_L3)
    type_mat <- map(type_meta, function(meta) { mat[keep_genes,meta$barcodes] })
    
    type_detection <- map(
        type_mat,
        function(mat) {
            mat <- t(mat)
            diff(mat@p) / nrow(mat)
        }
    )
    
    type_mat <- map2(
        type_mat, type_detection,
        function(type_mat, detection) {
            type_mat[detection >= min_detection,]
        }
    )
    
    type_pb <- map2(
        type_meta, type_mat,
        function(meta, mat) {
            rownames(meta) <- meta$barcodes
            
            so <- CreateSeuratObject(
                counts = mat,
                meta.data = meta
            )
    
            AggregateExpression(
                so,
                assays = "RNA",
                return.seurat = FALSE,
                group.by = "pb_group")$RNA
        }
    )
    
    type_pb
}

In [17]:
h5ad_to_pseudobulk_meta <- function(h5ad_file, group_by, filters) {
    meta <- read_h5ad_cell_meta(h5ad_file)
    
    if(!is.null(filters)) {
        for (i in 1:length(filters)) {
            filter_group <- names(filters)[i]
            filter_group <- rlang::parse_expr(filter_group)

            keep_groups <- filters[[i]]
            meta <- meta %>%
                filter(!!filter_group %in% keep_groups)
        }
    }
    
    meta$pb_group <- meta[[group_by[1]]]
    if(length(group_by) > 1) {
        for (i in 2:length(group_by)) {
            meta$pb_group <- paste(meta$pb_group, meta[[group_by[i]]], sep = ".")
        }
    }
    meta$pb_group <- gsub(" ","-",meta$pb_group)
    meta$pb_group <- gsub("_","-",meta$pb_group)
    
    meta <- meta %>%
      select(pb_group, cohort.cohortGuid,
             starts_with("subject"),
             starts_with("sample"),
             starts_with("AIFI_")) %>%
      group_by(pb_group) %>%
      mutate(n_cells = n()) %>%
      ungroup() %>%
      unique() %>%
      rename(barcodes = pb_group)

    split(meta, meta$AIFI_L3)
}

## Retrieve files to process in HISE

We'll retrieve our clean, non-normalized .h5ad datasets for each AIFI_L2 class from HISE:

In [9]:
h5ad_uuids <- list(
    "ASDC" = "93995785-045f-4068-84b6-2caffadd2046",
    "CD14_monocyte" = "889376f0-2f6d-4eb5-83b1-09a5dbe7bddb",
    "CD16_monocyte" = "fdb1a13e-9f00-458c-87b3-490b0cbe5d60",
    "CD56bright_NK_cell" = "80a1f0a7-b295-4529-b94a-28e5a5cf5d99",
    "CD56dim_NK_cell" = "c737f38d-489a-496d-9543-0df3cd28b73e",
    "CD8aa" = "8eb443ae-8416-4cec-a713-239f3c09a049",
    "cDC1" = "2030a86a-8496-49b9-9648-609963037bf5",
    "cDC2" = "2e083c20-6461-4288-a55c-aa1fc0b0dac9",
    "DN_T_cell" = "b4ba5cd7-af29-4b79-b383-b02d22d6a676",
    "Effector_B_cell" = "79182a91-5c2c-495c-9d9e-211c2c1e7ce0",
    "Erythrocyte" = "394197d7-85f0-46df-89cc-b837a7de107b",
    "gdT" = "b5c5cc8d-e19e-46d6-8313-2901c19911c9",
    "ILC" = "f61c717f-f176-4e19-b886-4418d4585f86",
    "Intermediate_monocyte" = "c6a7c833-27c7-4f5e-8c5b-500d50e6d52d",
    "MAIT" = "2472e2a9-851f-4bfb-a4a0-266f1d307d8e",
    "Memory_B_cell" = "f5deed80-15d6-4faf-9e83-bec55d337656",
    "Memory_CD4_T_cell" = "cc12267d-73e4-4b52-b1de-337d218b31c2",
    "Memory_CD8_T_cell" = "25fc1e17-0dd3-478a-a2c2-a6871b676b3d",
    "Naive_B_cell" = "c57030dd-4c66-4be1-9d06-cc8f505c24d6",
    "Naive_CD4_T_cell" = "bec3a130-218a-4095-9183-b5908872e109",
    "Naive_CD8_T_cell" = "0590000f-c9c6-4b3d-a613-1fa8bc65abbd",
    "pDC" = "21954e50-0e55-484e-aa54-36d892bb52b6",
    "Plasma_cell" = "896280a1-b04d-4f26-9549-80e8c16dc612",
    "Platelet" = "5304bc14-1539-4443-a2dc-99632db631ea",
    "Progenitor_cell" = "054798fb-1d63-4b24-8d0a-ad61f68ebcbc",
    "Proliferating_NK_cell" = "72ec0f6a-0994-4f5f-9c8f-e46b3846a83d",
    "Proliferating_T_cell" = "ac9b7079-dd80-425d-8923-945d900cc445",
    "Transitional_B_cell" = "3a2e0fe0-a199-47f2-bab3-4b2f5d905451",
    "Treg" = "c9c5f690-baaa-46fc-88c7-a723dbc95986"
)

In [10]:
res <- cacheFiles(h5ad_uuids)

In [11]:
h5ad_files <- list.files(paste0("cache/", h5ad_uuids), pattern = ".h5ad", full.names = TRUE)

## Convert to pseudobulk for each type

In [12]:
keep_cohorts <- c("BR1", "BR2")

In [13]:
group_by <- c("AIFI_L3", "subject.subjectGuid", "sample.visitName")

In [14]:
pb_mats <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_seurat,
    group_by = group_by,
    min_detection = min_detection,
    filters = list(cohort.cohortGuid = keep_cohorts)
)

In [15]:
pb_mats <- unlist(pb_mats, recursive = FALSE)

In [18]:
pb_meta <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_meta,
    group_by = group_by,
    filters = list(cohort.cohortGuid = keep_cohorts)
)

In [19]:
pb_meta <- unlist(pb_meta, recursive = FALSE)

## Save to .rds of metadata and matrices for later use in R

In [20]:
pb_data <- list(
    mats = pb_mats,
    meta = pb_meta
)

In [21]:
out_rds <- paste0("output/ref_pbmc_AIFI_L3_seurat_pseudobulk_list_", Sys.Date(), ".rds")
saveRDS(pb_data, out_rds)

In [22]:
out_files <- c(out_files, out_rds)

## Save to .h5 files for later use via H5weaver

In [23]:
pb_h5_lists <- future_map2(
    pb_mats, pb_meta,
    function(mat, meta) {
        h5_list <- list(
            matrix_dgCMatrix = mat,
            matrix = list(
                observations = as.list(meta)
            )
        )
        h5_list_convert_from_dgCMatrix(h5_list)
    }
)

In [24]:
out_h5 <- map(
    names(pb_h5_lists), 
    function(ct) {
        ct <- gsub(" ", "-", ct)
        paste0("output/ref_pbmc_AIFI_L3_",
               ct,
               "_seurat_pseudobulk_",
               Sys.Date(),
               ".h5")
    }
)

In [25]:
walk2(
    pb_h5_lists, out_h5,
    write_h5_list
)

combine all of these in .tar file for upload

In [26]:
h5_tar <- paste0("output/ref_pbmc_AIFI_L3_seurat_pseudobulk_h5_", Sys.Date(), ".tar.gz")
system_call <- paste(
    "tar -czf", h5_tar, "output/ref_pbmc_AIFI_L3*.h5"
)
system(system_call)

In [27]:
out_files <- c(out_files, h5_tar)

## Convert to pseudobulk within groups

### BR1 Cohort

In [28]:
subset_name <- "BR1"

In [30]:
group_by <- c("AIFI_L3", "subject.subjectGuid", "sample.visitName")

In [31]:
pb_mats <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_seurat,
    group_by = group_by,
    min_detection = min_detection,
    filters = list(cohort.cohortGuid = "BR1")
)

In [32]:
pb_mats <- unlist(pb_mats, recursive = FALSE)

In [33]:
pb_meta <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_meta,
    group_by = group_by,
    filters = list(cohort.cohortGuid = "BR1")
)

In [34]:
pb_meta <- unlist(pb_meta, recursive = FALSE)

#### Save to .rds of metadata and matrices for later use in R

In [35]:
pb_data <- list(
    mats = pb_mats,
    meta = pb_meta
)

In [36]:
out_rds <- paste0("output/ref_pbmc_", subset_name, "_AIFI_L3_seurat_pseudobulk_list_", Sys.Date(), ".rds")
saveRDS(pb_data, out_rds)

In [37]:
out_files <- c(out_files, out_rds)

#### Save to .h5 files for later use via H5weaver

In [38]:
pb_h5_lists <- future_map2(
    pb_mats, pb_meta,
    function(mat, meta) {
        h5_list <- list(
            matrix_dgCMatrix = mat,
            matrix = list(
                observations = as.list(meta)
            )
        )
        h5_list_convert_from_dgCMatrix(h5_list)
    }
)

In [39]:
out_h5 <- map(
    names(pb_h5_lists), 
    function(ct) {
        ct <- gsub(" ", "-", ct)
        paste0("output/ref_pbmc_",subset_name,"AIFI_L3_",
               ct,
               "_seurat_pseudobulk_",
               Sys.Date(),
               ".h5")
    }
)

In [40]:
walk2(
    pb_h5_lists, out_h5,
    write_h5_list
)

combine all of these in .tar file for upload

In [41]:
h5_tar <- paste0("output/ref_pbmc_",subset_name,"_AIFI_L3_seurat_pseudobulk_h5_", Sys.Date(), ".tar.gz")
system_call <- paste(
    "tar -czf", h5_tar, "output/ref_pbmc_",subset_name,"*.h5"
)
system(system_call)

In [42]:
out_files <- c(out_files, h5_tar)

### BR2 Cohort

In [43]:
subset_name <- "BR2"

In [44]:
group_by <- c("AIFI_L3", "subject.subjectGuid", "sample.visitName")

In [45]:
pb_mats <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_seurat,
    group_by = group_by,
    min_detection = min_detection,
    filters = list(cohort.cohortGuid = "BR2")
)

In [46]:
pb_mats <- unlist(pb_mats, recursive = FALSE)

In [47]:
pb_meta <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_meta,
    group_by = group_by,
    filters = list(cohort.cohortGuid = "BR2")
)

In [48]:
pb_meta <- unlist(pb_meta, recursive = FALSE)

#### Save to .rds of metadata and matrices for later use in R

In [49]:
pb_data <- list(
    mats = pb_mats,
    meta = pb_meta
)

In [50]:
out_rds <- paste0("output/ref_pbmc_", subset_name, "_AIFI_L3_seurat_pseudobulk_list_", Sys.Date(), ".rds")
saveRDS(pb_data, out_rds)

In [51]:
out_files <- c(out_files, out_rds)

#### Save to .h5 files for later use via H5weaver

In [52]:
pb_h5_lists <- future_map2(
    pb_mats, pb_meta,
    function(mat, meta) {
        h5_list <- list(
            matrix_dgCMatrix = mat,
            matrix = list(
                observations = as.list(meta)
            )
        )
        h5_list_convert_from_dgCMatrix(h5_list)
    }
)

In [53]:
out_h5 <- map(
    names(pb_h5_lists), 
    function(ct) {
        ct <- gsub(" ", "-", ct)
        paste0("output/ref_pbmc_",subset_name,"AIFI_L3_",
               ct,
               "_seurat_pseudobulk_",
               Sys.Date(),
               ".h5")
    }
)

In [54]:
walk2(
    pb_h5_lists, out_h5,
    write_h5_list
)

combine all of these in .tar file for upload

In [55]:
h5_tar <- paste0("output/ref_pbmc_",subset_name,"_AIFI_L3_seurat_pseudobulk_h5_", Sys.Date(), ".tar.gz")
system_call <- paste(
    "tar -czf", h5_tar, "output/ref_pbmc_",subset_name,"*.h5"
)
system(system_call)

In [56]:
out_files <- c(out_files, h5_tar)

### CMV Positive

In [57]:
subset_name <- "cmv-positive"

In [58]:
group_by <- c("AIFI_L3", "subject.subjectGuid", "sample.visitName")

In [59]:
pb_mats <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_seurat,
    group_by = group_by,
    min_detection = min_detection,
    filters = list(cohort.cohortGuid = keep_cohorts,
                   subject.cmv = "Positive")
)

In [60]:
pb_mats <- unlist(pb_mats, recursive = FALSE)

In [61]:
pb_meta <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_meta,
    group_by = group_by,
    filters = list(cohort.cohortGuid = keep_cohorts,
                   subject.cmv = "Positive")
)

In [62]:
pb_meta <- unlist(pb_meta, recursive = FALSE)

#### Save to .rds of metadata and matrices for later use in R

In [63]:
pb_data <- list(
    mats = pb_mats,
    meta = pb_meta
)

In [64]:
out_rds <- paste0("output/ref_pbmc_", subset_name, "_AIFI_L3_seurat_pseudobulk_list_", Sys.Date(), ".rds")
saveRDS(pb_data, out_rds)

In [65]:
out_files <- c(out_files, out_rds)

#### Save to .h5 files for later use via H5weaver

In [66]:
pb_h5_lists <- future_map2(
    pb_mats, pb_meta,
    function(mat, meta) {
        h5_list <- list(
            matrix_dgCMatrix = mat,
            matrix = list(
                observations = as.list(meta)
            )
        )
        h5_list_convert_from_dgCMatrix(h5_list)
    }
)

In [67]:
out_h5 <- map(
    names(pb_h5_lists), 
    function(ct) {
        ct <- gsub(" ", "-", ct)
        paste0("output/ref_pbmc_",subset_name,"AIFI_L3_",
               ct,
               "_seurat_pseudobulk_",
               Sys.Date(),
               ".h5")
    }
)

In [68]:
walk2(
    pb_h5_lists, out_h5,
    write_h5_list
)

combine all of these in .tar file for upload

In [69]:
h5_tar <- paste0("output/ref_pbmc_",subset_name,"_AIFI_L3_seurat_pseudobulk_h5_", Sys.Date(), ".tar.gz")
system_call <- paste(
    "tar -czf", h5_tar, "output/ref_pbmc_",subset_name,"*.h5"
)
system(system_call)

In [70]:
out_files <- c(out_files, h5_tar)

### CMV Negative

In [71]:
subset_name <- "cmv-negative"

In [72]:
group_by <- c("AIFI_L3", "subject.subjectGuid", "sample.visitName")

In [73]:
pb_mats <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_seurat,
    group_by = group_by,
    min_detection = min_detection,
    filters = list(cohort.cohortGuid = keep_cohorts,
                   subject.cmv = "Negative")
)

In [74]:
pb_mats <- unlist(pb_mats, recursive = FALSE)

In [75]:
pb_meta <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_meta,
    group_by = group_by,
    filters = list(cohort.cohortGuid = keep_cohorts,
                   subject.cmv = "Negative")
)

In [76]:
pb_meta <- unlist(pb_meta, recursive = FALSE)

#### Save to .rds of metadata and matrices for later use in R

In [77]:
pb_data <- list(
    mats = pb_mats,
    meta = pb_meta
)

In [78]:
out_rds <- paste0("output/ref_pbmc_", subset_name, "_AIFI_L3_seurat_pseudobulk_list_", Sys.Date(), ".rds")
saveRDS(pb_data, out_rds)

In [79]:
out_files <- c(out_files, out_rds)

#### Save to .h5 files for later use via H5weaver

In [80]:
pb_h5_lists <- future_map2(
    pb_mats, pb_meta,
    function(mat, meta) {
        h5_list <- list(
            matrix_dgCMatrix = mat,
            matrix = list(
                observations = as.list(meta)
            )
        )
        h5_list_convert_from_dgCMatrix(h5_list)
    }
)

In [81]:
out_h5 <- map(
    names(pb_h5_lists), 
    function(ct) {
        ct <- gsub(" ", "-", ct)
        paste0("output/ref_pbmc_",subset_name,"AIFI_L3_",
               ct,
               "_seurat_pseudobulk_",
               Sys.Date(),
               ".h5")
    }
)

In [82]:
walk2(
    pb_h5_lists, out_h5,
    write_h5_list
)

combine all of these in .tar file for upload

In [83]:
h5_tar <- paste0("output/ref_pbmc_",subset_name,"_AIFI_L3_seurat_pseudobulk_h5_", Sys.Date(), ".tar.gz")
system_call <- paste(
    "tar -czf", h5_tar, "output/ref_pbmc_",subset_name,"*.h5"
)
system(system_call)

In [84]:
out_files <- c(out_files, h5_tar)

### Female subjects

In [85]:
subset_name <- "female"

In [86]:
group_by <- c("AIFI_L3", "subject.subjectGuid", "sample.visitName")

In [87]:
pb_mats <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_seurat,
    group_by = group_by,
    min_detection = min_detection,
    filters = list(cohort.cohortGuid = keep_cohorts,
                   subject.biologicalSex = "Female")
)

In [88]:
pb_mats <- unlist(pb_mats, recursive = FALSE)

In [89]:
pb_meta <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_meta,
    group_by = group_by,
    filters = list(cohort.cohortGuid = keep_cohorts,
                   subject.biologicalSex = "Female")
)

In [90]:
pb_meta <- unlist(pb_meta, recursive = FALSE)

#### Save to .rds of metadata and matrices for later use in R

In [91]:
pb_data <- list(
    mats = pb_mats,
    meta = pb_meta
)

In [92]:
out_rds <- paste0("output/ref_pbmc_", subset_name, "_AIFI_L3_seurat_pseudobulk_list_", Sys.Date(), ".rds")
saveRDS(pb_data, out_rds)

In [93]:
out_files <- c(out_files, out_rds)

#### Save to .h5 files for later use via H5weaver

In [94]:
pb_h5_lists <- future_map2(
    pb_mats, pb_meta,
    function(mat, meta) {
        h5_list <- list(
            matrix_dgCMatrix = mat,
            matrix = list(
                observations = as.list(meta)
            )
        )
        h5_list_convert_from_dgCMatrix(h5_list)
    }
)

In [95]:
out_h5 <- map(
    names(pb_h5_lists), 
    function(ct) {
        ct <- gsub(" ", "-", ct)
        paste0("output/ref_pbmc_",subset_name,"AIFI_L3_",
               ct,
               "_seurat_pseudobulk_",
               Sys.Date(),
               ".h5")
    }
)

In [96]:
walk2(
    pb_h5_lists, out_h5,
    write_h5_list
)

combine all of these in .tar file for upload

In [97]:
h5_tar <- paste0("output/ref_pbmc_",subset_name,"_AIFI_L3_seurat_pseudobulk_h5_", Sys.Date(), ".tar.gz")
system_call <- paste(
    "tar -czf", h5_tar, "output/ref_pbmc_",subset_name,"*.h5"
)
system(system_call)

In [98]:
out_files <- c(out_files, h5_tar)

### Male subjects

In [99]:
subset_name <- "male"

In [100]:
group_by <- c("AIFI_L3", "subject.subjectGuid", "sample.visitName")

In [101]:
pb_mats <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_seurat,
    group_by = group_by,
    min_detection = min_detection,
    filters = list(cohort.cohortGuid = keep_cohorts,
                   subject.biologicalSex = "Male")
)

In [102]:
pb_mats <- unlist(pb_mats, recursive = FALSE)

In [103]:
pb_meta <- future_map(
    h5ad_files,
    h5ad_to_pseudobulk_meta,
    group_by = group_by,
    filters = list(cohort.cohortGuid = keep_cohorts,
                   subject.biologicalSex = "Male")
)

In [104]:
pb_meta <- unlist(pb_meta, recursive = FALSE)

#### Save to .rds of metadata and matrices for later use in R

In [105]:
pb_data <- list(
    mats = pb_mats,
    meta = pb_meta
)

In [106]:
out_rds <- paste0("output/ref_pbmc_", subset_name, "_AIFI_L3_seurat_pseudobulk_list_", Sys.Date(), ".rds")
saveRDS(pb_data, out_rds)

In [107]:
out_files <- c(out_files, out_rds)

#### Save to .h5 files for later use via H5weaver

In [108]:
pb_h5_lists <- future_map2(
    pb_mats, pb_meta,
    function(mat, meta) {
        h5_list <- list(
            matrix_dgCMatrix = mat,
            matrix = list(
                observations = as.list(meta)
            )
        )
        h5_list_convert_from_dgCMatrix(h5_list)
    }
)

In [109]:
out_h5 <- map(
    names(pb_h5_lists), 
    function(ct) {
        ct <- gsub(" ", "-", ct)
        paste0("output/ref_pbmc_",subset_name,"AIFI_L3_",
               ct,
               "_seurat_pseudobulk_",
               Sys.Date(),
               ".h5")
    }
)

In [110]:
walk2(
    pb_h5_lists, out_h5,
    write_h5_list
)

combine all of these in .tar file for upload

In [111]:
h5_tar <- paste0("output/ref_pbmc_",subset_name,"AIFI_L3_seurat_pseudobulk_h5_", Sys.Date(), ".tar.gz")
system_call <- paste(
    "tar -czf", h5_tar, "output/ref_pbmc_",subset_name,"*.h5"
)
system(system_call)

In [112]:
out_files <- c(out_files, h5_tar)

## Upload results to HISE

In [113]:
study_space_uuid <- "64097865-486d-43b3-8f94-74994e0a72e0"
title <- paste("PBMC Ref. Seurat Pseudobulk Sets", Sys.Date())

In [114]:
in_list <- h5ad_uuids

In [115]:
out_list <- as.list(out_files)
out_list

In [116]:
uploadFiles(
    files = out_list,
    studySpaceId = study_space_uuid,
    title = title,
    inputFileIds = in_list,
    store = "project",
    destination = "pseudobulk_sets",
    doPrompt = FALSE
)

[1] "Authorization token invalid or expired."
[1] "Retrying..."


In [117]:
sessionInfo()

R version 4.3.2 (2023-10-31)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS

Matrix products: default
BLAS/LAPACK: /opt/conda/lib/libopenblasp-r0.3.25.so;  LAPACK version 3.11.0

locale:
 [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
 [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
 [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
[10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   

time zone: Etc/UTC
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] Seurat_5.0.1       SeuratObject_5.0.1 sp_2.1-2           furrr_0.3.1       
 [5] future_1.33.1      purrr_1.0.2        H5weaver_1.2.0     rhdf5_2.46.1      
 [9] Matrix_1.6-4       data.table_1.15.0  hise_2.16.0        dplyr_1.1.4       

loaded via a namespace (and not attached):
  [1] RColorBrewer_1.1-3  