In [4]:
# load libraries
quiet_library <- function(...) {
    suppressPackageStartupMessages(library(...))
}
quiet_library(Seurat)
quiet_library(ggplot2)
quiet_library(Matrix)
quiet_library(H5weaver)
quiet_library(dplyr)
quiet_library(viridis)
quiet_library(harmony)
quiet_library(Nebulosa)

In [3]:
# Read in ADT information
adt_info_func <- function(path){
  h5_list <- h5dump(path)
  h5_list <- h5_list_convert_to_dgCMatrix(h5_list, target = 'ADT')
  adt_mtx <- h5_list$ADT_dgCMatrix
  adt_mtx
}

# Seurat RNA Pipeline
seurat_proc_func <- function(x){
  DefaultAssay(x) <- 'RNA'
    # x <- NormalizeData(x) %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA()
  x <- suppressWarnings(SCTransform(x, verbose = T))
  x <- RunPCA(x, verbose = T)
  x <- RunUMAP(x, dims = 1:30, verbose = T) %>% FindNeighbors(dims = 1:30, verbose = T) %>% FindClusters(resolution = 0.5, verbose = T)
  x
}

# Seurat RNA Label transfer
label_transfer_func <- function(reference, query){
  anchors <- FindTransferAnchors(
    reference = reference,
    query = query,
    normalization.method = "SCT",
    reference.reduction = "spca",
    dims = 1:50
  )
  query <- TransferData(anchorset = anchors, reference = reference, query = query,
                    refdata = list(
                      celltype.l1 = "celltype.l1",
                      celltype.l2 = "celltype.l2",
                      celltype.l3 = "celltype.l3",
                      predicted_ADT = 'ADT'))
  query
}

label_transfer_func_alt <- function(reference, query){
  anchors <- FindTransferAnchors(
    reference = reference,
    query = query,
    normalization.method = "SCT",
    reference.reduction = "spca",
    dims = 1:50, recompute.residuals = FALSE
  )
  query <- TransferData(anchorset = anchors, reference = reference, query = query,
                    refdata = list(
                      celltype.l1 = "celltype.l1",
                      celltype.l2 = "celltype.l2",
                      celltype.l3 = "celltype.l3",
                      predicted_ADT = 'ADT'))
  query
}

In [24]:
all_h5 <- list.files(path = '.', pattern = '*frag*', full.names = TRUE)
all_h5<-all_h5[1:16]
extracted_pattern <- sub(".*/(.*?)_[^_]*$", "\\1", all_h5)

In [25]:
split_list <- strsplit(extracted_pattern, "_", fixed = TRUE)
split_df <- do.call(rbind, sapply(split_list, function(x) as.data.frame(t(x)), simplify = FALSE))
df <- as.data.frame(split_df, stringsAsFactors = FALSE)

In [26]:
colnames(df)<-c('GEO_Accession','Batch','pbmc_sample_id')

In [27]:
df$combined_sample_id<-extracted_pattern

In [29]:
write.csv(df,'meta_data_GEO.csv')