In [None]:
########################################################################
# Author    : A. Alsema
# Date      : July 2021
# Dataset   : Visium Spatial Transcriptomics for MS lesions
# Purpose   : match spotlight proportion estimates to seurat metadata on spot-level

# Required inputs:
# - path_to_decnv_matrices: this contains tables with the estimated cell type proportions per sample  as "ST_ID, "_celltype_prop.csv"
# - path_to_seurat_clusters: this contains "seurat_metadata.csv" with seurat metadata of all samples

# Output:
# - seurat_metadata_with_cellprop.csv: merged data inputs, matched by spot barcode
########################################################################

In [1]:
rm(list = ls())

In [15]:
library(stringr)
path_to_decnv_matrices <- c("RData/SPOTlight/")
path_to_seurat_clusters <- c("Routput/Seurat/")

In [32]:
current_samples <- c("ST31", "ST32", "ST34", 
                     "ST37", "ST38", "ST33", 
                     "ST69", "ST70", "ST79", 
                     "ST67", "ST68", "ST71", 
                     "ST72", "ST73", "ST74") 
deconv_df_combined <- data.frame()
for (current_sample in current_samples){
    deconv_df <- read.csv(paste0(path_to_decnv_matrices,  current_sample, "_celltype_prop.csv"), row.names = 1)
    deconv_df$my_barcode <- str_sub(row.names(deconv_df), end = 18)
    deconv_df$my_barcode <- paste(current_sample, deconv_df$my_barcode, sep = "_")
    rownames(deconv_df) <- deconv_df$my_barcode
    deconv_df_combined <- rbind(deconv_df, deconv_df_combined)
}

str(deconv_df_combined)

In [31]:
cluster_info <- read.csv(paste0(path_to_seurat_clusters, "seurat_metadata.csv"), row.names =1)
cluster_info$barcode <- str_sub(row.names(cluster_info), end = 18)
cluster_info$barcode <- paste(cluster_info$sample_ID, cluster_info$barcode, sep = "_")

str(cluster_info)

'data.frame':	55519 obs. of  22 variables:
 $ orig.ident            : chr  "ST31_CWM" "ST31_CWM" "ST31_CWM" "ST31_CWM" ...
 $ nCount_Spatial        : int  977 1526 4381 2517 1727 2671 507 1445 1388 2513 ...
 $ nFeature_Spatial      : int  684 937 2010 1427 1073 1482 401 847 875 1403 ...
 $ sample_ID             : chr  "ST31" "ST31" "ST31" "ST31" ...
 $ manuscript_ID         : chr  "C1" "C1" "C1" "C1" ...
 $ slide                 : chr  "ST_1" "ST_1" "ST_1" "ST_1" ...
 $ sample_name           : chr  "CWM_2012_070" "CWM_2012_070" "CWM_2012_070" "CWM_2012_070" ...
 $ donor_ID              : chr  "2012-070" "2012-070" "2012-070" "2012-070" ...
 $ lesiontype            : chr  "CNT" "CNT" "CNT" "CNT" ...
 $ Age                   : int  79 79 79 79 79 79 79 79 79 79 ...
 $ Sex                   : chr  "M" "M" "M" "M" ...
 $ RIN                   : num  7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 7.7 ...
 $ PMI_min               : int  345 345 345 345 345 345 345 345 345 345 ...
 $ brain_pH           

In [44]:
deconv_match <- deconv_df_combined[match(cluster_info$barcode, deconv_df_combined$my_barcode),]
identical(rownames(deconv_match), cluster_info$barcode)

In [46]:
deconv_cluster_info <- cbind(deconv_match, cluster_info)

In [48]:
write.csv(deconv_cluster_info, paste0(path_to_seurat_clusters, "seurat_metadata_with_cellprop.csv"))