In [29]:
# load libraries
quiet_library <- function(...) {
    suppressPackageStartupMessages(library(...))
}
quiet_library(Seurat)
quiet_library(ggplot2)
quiet_library(Matrix)
quiet_library(H5weaver)
quiet_library(dplyr)
quiet_library(viridis)
quiet_library(harmony)
quiet_library(Nebulosa)
library(GEOquery)

In [3]:
# Read in ADT information
adt_info_func <- function(path){
  h5_list <- h5dump(path)
  h5_list <- h5_list_convert_to_dgCMatrix(h5_list, target = 'ADT')
  adt_mtx <- h5_list$ADT_dgCMatrix
  adt_mtx
}

# Seurat RNA Pipeline
seurat_proc_func <- function(x){
  DefaultAssay(x) <- 'RNA'
    # x <- NormalizeData(x) %>% FindVariableFeatures() %>% ScaleData() %>% RunPCA()
  x <- suppressWarnings(SCTransform(x, verbose = T))
  x <- RunPCA(x, verbose = T)
  x <- RunUMAP(x, dims = 1:30, verbose = T) %>% FindNeighbors(dims = 1:30, verbose = T) %>% FindClusters(resolution = 0.5, verbose = T)
  x
}

# Seurat RNA Label transfer
label_transfer_func <- function(reference, query){
  anchors <- FindTransferAnchors(
    reference = reference,
    query = query,
    normalization.method = "SCT",
    reference.reduction = "spca",
    dims = 1:50
  )
  query <- TransferData(anchorset = anchors, reference = reference, query = query,
                    refdata = list(
                      celltype.l1 = "celltype.l1",
                      celltype.l2 = "celltype.l2",
                      celltype.l3 = "celltype.l3",
                      predicted_ADT = 'ADT'))
  query
}

label_transfer_func_alt <- function(reference, query){
  anchors <- FindTransferAnchors(
    reference = reference,
    query = query,
    normalization.method = "SCT",
    reference.reduction = "spca",
    dims = 1:50, recompute.residuals = FALSE
  )
  query <- TransferData(anchorset = anchors, reference = reference, query = query,
                    refdata = list(
                      celltype.l1 = "celltype.l1",
                      celltype.l2 = "celltype.l2",
                      celltype.l3 = "celltype.l3",
                      predicted_ADT = 'ADT'))
  query
}

In [24]:
all_h5 <- list.files(path = '.', pattern = '*frag*', full.names = TRUE)
all_h5<-all_h5[1:16]
extracted_pattern <- sub(".*/(.*?)_[^_]*$", "\\1", all_h5)

In [25]:
split_list <- strsplit(extracted_pattern, "_", fixed = TRUE)
split_df <- do.call(rbind, sapply(split_list, function(x) as.data.frame(t(x)), simplify = FALSE))
df <- as.data.frame(split_df, stringsAsFactors = FALSE)

In [26]:
colnames(df)<-c('GEO_Accession','Batch','pbmc_sample_id')

In [27]:
df$combined_sample_id<-extracted_pattern

In [29]:
write.csv(df,'meta_data_GEO.csv')

In [24]:
df<-read.csv('meta_data_GEO.csv')

In [28]:
head(df)

Unnamed: 0_level_0,X,GEO_Accession,Batch,pbmc_sample_id,combined_sample_id
Unnamed: 0_level_1,<int>,<chr>,<chr>,<chr>,<chr>
1,1,GSM6611363,B065-P1,PB00593-04,GSM6611363_B065-P1_PB00593-04
2,2,GSM6611364,B069-P1,PB00323-02,GSM6611364_B069-P1_PB00323-02
3,3,GSM6611365,B076-P1,PB00368-04,GSM6611365_B076-P1_PB00368-04
4,4,GSM6611366,B076-P1,PB00353-03,GSM6611366_B076-P1_PB00353-03
5,5,GSM6611367,B076-P1,PB00334-03,GSM6611367_B076-P1_PB00334-03
6,6,GSM6611368,B076-P1,PB00377-03,GSM6611368_B076-P1_PB00377-03


In [2]:
eList <- getGEO("GSE214546")

Found 1 file(s)

GSE214546_series_matrix.txt.gz



In [27]:
df_meta<-as.data.frame(eList$GSE214546_series_matrix.txt.gz) %>% arrange(geo_accession)
colnames(df_meta)

In [31]:
df_meta<-df_meta[c('geo_accession','age.ch1','birth.year.ch1','cell.type.ch1','library.type.ch1','race.ch1','Sex.ch1','subject_id.ch1')]

In [32]:
colnames(df_meta)[1]<-'GEO_Accession'

In [34]:
df<-left_join(df,df_meta)

[1m[22mJoining with `by = join_by(GEO_Accession)`


In [35]:
write.csv(df,'meta_data_GEO.csv')