# How to analyze HTO data

In our database, we utilized HTO data generated from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE251912. This documentation has intructi

In [1]:
library(Matrix)
library(Seurat)
library(ggplot2)

Attaching SeuratObject



In [None]:
'''
The catalogue numbers from Human Biolegend TotalSeq are A0251-A0257. Please find sequences for all of them below:
A0251 = GTCAACTCTTTAGCG
A0252 = TGATGGCCTATTGGG
A0253 = TTCCGCCTCTCTTTG
A0254 = AGTAAGTTCAGCGTA
A0255 = AAGTATCGTTTCGCA
A0256 = GGTTGCCAGATGTCA
A0257 = TGTCTTTCCTGCCAG

Islet 67:
# islet 67 GEX samples: SRR27326986, SRR27326987
A0251 = 24Hr Untreated
A0252 = 24Hr Untreated
A0253 = 24Hr Untreated
A0254 = 24Hr Untreated
A0255 = 2Hr H2O2
A0256 = 4Hr Cytokines
A0257 = 24Hr Cytokines
 
Islet 116 & Islet 117:
A0251 = 24Hr Untreated
A0252 = 2Hr Cytokines
A0253 = 4Hr Cytokines
A0254 = 16h Cytokines
A0255 = 24Hr Cytokines
A0256 = 2Hr H2O2
 
Islet 150, Islet 162 & Islet 168:
# islet 150 GEX samples: SRR27326996, SRR27326997
# islet 162 GEX samples: SRR27326994, SRR27326995
# islet 168 GEX samples: SRR27326992, SRR27326993
A0251 = 2Hr Thapsigargin
A0252 = 4Hr Thapsigargin
A0253 = 4Hr DMSO
A0254 = 24Hr Untreated
A0255 = 24h DMSO
A0256 = 24h Thapsigargin
'''

In [None]:
hto_dir = "/nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/data/GEO/"
gex_dir = "/nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/temp_rds/"

In [None]:
load_HTO <- function(barcode.path, features.path, matrix.path) {
    mat.htos <- readMM(file = matrix.path)
    feature.names = read.delim(features.path, header = FALSE, stringsAsFactors = FALSE)
    barcode.names = read.delim(barcode.path, header = FALSE, stringsAsFactors = FALSE)
    colnames(mat.htos) = barcode.names$V1
    rownames(mat.htos) = feature.names$V1
    
    return(mat.htos)
}

In [None]:
load_mm <- function(matrix_file, features_file, barcodes_file) {
    tmp <- as(Matrix::readMM(matrix_file), 'dgCMatrix')
    features <- read.table(features_file, as.is=T, sep='\t', head=F)
    features <- paste0(features$V1, ' (', features$V2, ')')
    barcodes <- read.table(barcodes_file, as.is=T, head=F)[,1]
    dimnames(tmp) <- list(features, barcodes)
    return(tmp)
}


RNA_BARCODES <- paste0(gex_dir, "SRR27326996.barcodes.tsv")
RNA_FEATURES <- paste0(gex_dir, "SRR27326996.features.tsv")
RNA_MTX <- paste0(gex_dir, "SRR27326996.matrix.mtx")

mm <- load_mm(RNA_MTX, RNA_FEATURES, RNA_BARCODES)
rna <- CreateSeuratObject(counts = mm, min.cells=1, min.features=1, assay = "RNA", project='RNA')
rna <- NormalizeData(rna, verbose=F)
rna <- FindVariableFeatures(rna, selection.method='vst', nfeatures=2000, verbose=F)
rna <- ScaleData(rna, verbose=F)
rna <- RunPCA(rna, npcs=200, verbose=F)
rna <- RunUMAP(rna, reduction='pca', dims=1:PCS)
rna <- FindNeighbors(rna, dims = 1:PCS, k.param = 20)
rna <- FindClusters(rna, resolution = RESOLUTION, n.start = 100)

# Generate singlet files

## Islet 150

In [None]:
# islet 150 GEX samples: SRR27326996, SRR27326997

Rscript /nfs/turbo/umms-scjp-pank/2_IIDP/scripts/HTODemux.R --matrix GSM7989940_Islet_150_SAMN21845647_Technical_Replicate_1_umi_count_matrix.mtx.gz --features GSM7989940_Islet_150_SAMN21845647_Technical_Replicate_1_umi_count_genes.tsv.gz --barcodes GSM7989940_Islet_150_SAMN21845647_Technical_Replicate_1_umi_count_barcodes.tsv.gz --rna SRR27326996.rna.rds --prefix Islet150_SRR27326996_ --outdir /nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/

Rscript /nfs/turbo/umms-scjp-pank/2_IIDP/scripts/HTODemux.R --matrix GSM7989940_Islet_150_SAMN21845647_Technical_Replicate_2_umi_count_matrix.mtx.gz --features GSM7989940_Islet_150_SAMN21845647_Technical_Replicate_2_umi_count_genes.tsv.gz --barcodes GSM7989940_Islet_150_SAMN21845647_Technical_Replicate_2_umi_count_barcodes.tsv.gz --rna SRR27326997.rna.rds --prefix Islet150_SRR27326997_ --outdir /nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/

### Islet 150 technical rep 1

In [None]:
HTO_BC <- paste0(hto_dir, "GSM7989940_Islet_150_SAMN21845647_Technical_Replicate_1_umi_count_barcodes.tsv.gz")
HTO_FEATURE <- paste0(hto_dir, "GSM7989940_Islet_150_SAMN21845647_Technical_Replicate_1_umi_count_genes.tsv.gz")
HTO_MATRIX <- paste0(hto_dir, "GSM7989940_Islet_150_SAMN21845647_Technical_Replicate_1_umi_count_matrix.mtx.gz")

mat.htos <- load_HTO(HTO_BC, HTO_FEATURE, HTO_MATRIX)

rna <- readRDS(paste0(gex_dir, "SRR27326996.rna.rds"))

joint.bcs <- intersect(colnames(mat.htos), colnames(rna))
length(joint.bcs)

In [None]:
mat.htos[1:5, 1:5]

In [None]:
rowSums(mat.htos)
mat.htos = mat.htos[rownames(mat.htos) != "unmapped", ]
mat.htos = mat.htos[rowSums(mat.htos) > 0, ]
rowSums(mat.htos)

In [None]:
# Subset RNA and HTO counts by joint cell barcodes
rna <- rna[, joint.bcs]
mat.htos <- as.matrix(mat.htos[, joint.bcs])

# Confirm that the HTO have the correct names
rownames(mat.htos)

In [None]:
# Setup Seurat object
islet.hashtag <- CreateSeuratObject(counts = rna@assays$RNA@counts)

islet.hashtag <- NormalizeData(islet.hashtag)
islet.hashtag <- FindVariableFeatures(islet.hashtag, selection.method = "mean.var.plot")
islet.hashtag <- ScaleData(islet.hashtag, features = VariableFeatures(islet.hashtag))

# Add HTO data as a new assay independent from RNA
islet.hashtag[["HTO"]] <- CreateAssayObject(counts = mat.htos)
# Normalize HTO data, here we use centered log-ratio (CLR) transformation
islet.hashtag <- NormalizeData(islet.hashtag, assay = "HTO", normalization.method = "CLR")

islet.hashtag

In [None]:
islet.hashtag <- HTODemux(islet.hashtag, assay = "HTO", positive.quantile = 0.99)
table(islet.hashtag$HTO_classification.global)
# Group cells based on the max HTO signal
Idents(islet.hashtag) <- "HTO_maxID"

In [None]:
Idents(islet.hashtag) <- "HTO_classification.global"
options(repr.plot.width = 9, repr.plot.height = 3, repr.plot.res = 300)
VlnPlot(islet.hashtag, features = "nCount_RNA", pt.size = 0.1, log = TRUE)

In [None]:
# First, we will remove negative cells from the object
islet.hashtag.subset <- subset(islet.hashtag, idents = "Negative", invert = TRUE)

# Calculate a UMAP embedding of the HTO data
DefaultAssay(islet.hashtag.subset) <- "HTO"
islet.hashtag.subset <- ScaleData(islet.hashtag.subset, features = rownames(islet.hashtag.subset),
    verbose = FALSE)
islet.hashtag.subset <- RunPCA(islet.hashtag.subset, features = rownames(islet.hashtag.subset), approx = FALSE)
islet.hashtag.subset <- RunUMAP(islet.hashtag.subset, dims = 1:6)

options(repr.plot.width = 8, repr.plot.height = 7, repr.plot.res = 300)
DimPlot(islet.hashtag.subset)

In [None]:
ElbowPlot(islet.singlet)

In [None]:
# Extract the singlets
islet.singlet <- subset(islet.hashtag, idents = "Singlet")

# Select the top 1000 most variable features
islet.singlet <- FindVariableFeatures(islet.singlet, selection.method = "mean.var.plot")

# Scaling RNA data, we only scale the variable features here for efficiency
islet.singlet <- ScaleData(islet.singlet, features = VariableFeatures(islet.singlet))

# Run PCA
islet.singlet <- RunPCA(islet.singlet, features = VariableFeatures(islet.singlet))

# We select the top 10 PCs for clustering and tSNE based on PCElbowPlot
islet.singlet <- FindNeighbors(islet.singlet, reduction = "pca", dims = 1:10)
islet.singlet <- FindClusters(islet.singlet, resolution = 0.6, verbose = FALSE)
islet.singlet <- RunUMAP(islet.singlet, reduction = "pca", dims = 1:10)


In [None]:
options(repr.plot.width = 8, repr.plot.height = 5, repr.plot.res = 300)
# Projecting singlet identities on UMAP visualization
DimPlot(islet.singlet, group.by = "HTO_classification")

In [None]:
islet.singlet

In [None]:
islet.singlet@meta.data

## Islet 162

In [None]:
# islet 162 GEX samples: SRR27326994, SRR27326995

Rscript /nfs/turbo/umms-scjp-pank/2_IIDP/scripts/HTODemux.R --matrix GSM7989941_Islet_162_SAMN25980818_umi_count_matrix.mtx.gz --features GSM7989941_Islet_162_SAMN25980818_umi_count_genes.tsv.gz --barcodes GSM7989941_Islet_162_SAMN25980818_umi_count_barcodes.tsv.gz --rna SRR27326995.rna.rds --prefix Islet162_SRR27326995_ --outdir /nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/

Rscript /nfs/turbo/umms-scjp-pank/2_IIDP/scripts/HTODemux.R --matrix GSM7989941_Islet_162_SAMN25980818_umi_count_matrix.mtx.gz --features GSM7989941_Islet_162_SAMN25980818_umi_count_genes.tsv.gz --barcodes GSM7989941_Islet_162_SAMN25980818_umi_count_barcodes.tsv.gz --rna SRR27326994.rna.rds --prefix Islet162_SRR27326994_ --outdir /nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/

## Islet 168

In [None]:
# islet 168 GEX samples: SRR27326992, SRR27326993

Rscript /nfs/turbo/umms-scjp-pank/2_IIDP/scripts/HTODemux.R --matrix GSM7989942_Islet_168_SAMN29494287_Technical_Replicate_1_umi_count_matrix.mtx --features GSM7989942_Islet_168_SAMN29494287_Technical_Replicate_1_umi_count_genes.tsv --barcodes GSM7989942_Islet_168_SAMN29494287_Technical_Replicate_1_umi_count_barcodes.tsv --rna SRR27326992.rna.rds --prefix Islet168_SRR27326992_ --outdir /nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/

Rscript /nfs/turbo/umms-scjp-pank/2_IIDP/scripts/HTODemux.R --matrix GSM7989942_Islet_168_SAMN29494287_Technical_Replicate_2_umi_count_matrix.mtx.gz --features GSM7989942_Islet_168_SAMN29494287_Technical_Replicate_2_umi_count_genes.tsv.gz --barcodes GSM7989942_Islet_168_SAMN29494287_Technical_Replicate_2_umi_count_barcodes.tsv.gz --rna SRR27326993.rna.rds --prefix Islet168_SRR27326993_ --outdir /nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/

## Islet 67

In [None]:
# islet 67 GEX samples: SRR27326986, SRR27326987

Rscript /nfs/turbo/umms-scjp-pank/2_IIDP/scripts/HTODemux.R --matrix GSM7989943_Islet_67_SAMN12496804_umi_count_matrix.mtx.gz --features GSM7989943_Islet_67_SAMN12496804_umi_count_genes.tsv.gz --barcodes GSM7989943_Islet_67_SAMN12496804_umi_count_barcodes.tsv.gz --rna SRR27326987.rna.rds --prefix Islet67_SRR27326987_ --outdir /nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/

Rscript /nfs/turbo/umms-scjp-pank/2_IIDP/scripts/HTODemux.R --matrix GSM7989943_Islet_67_SAMN12496804_umi_count_matrix.mtx.gz --features GSM7989943_Islet_67_SAMN12496804_umi_count_genes.tsv.gz --barcodes GSM7989943_Islet_67_SAMN12496804_umi_count_barcodes.tsv.gz --rna SRR27326986.rna.rds --prefix Islet67_SRR27326986_ --outdir /nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/

# Merge data

In [2]:
files <- list.files("/nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/", "_singlet.Rds")
files

In [3]:
samples <- unlist(lapply(strsplit(files, "_"), '[', 2))
samples

In [6]:
for (i in files) {
    srr <- unlist(lapply(strsplit(i, "_"), '[', 2))
    rna <- readRDS(paste0("/nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/", i))
    write.table(Cells(rna), paste0("/nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39/emptyDrops/results/pctMTusingBelowEndCliff_pctMtless30_FDR0.005/cellbender_default/", srr, "_passQC_barcodes_demultiplexed.csv"),
               row.names = F, col.names = F, quote = F)
}

In [5]:
data <- list()
for (i in files[1]) {
    srr <- unlist(lapply(strsplit(i, "_"), '[', 2))
    rna <- readRDS(paste0("/nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/", i))
    #rna <- RenameCells(rna, new.names = paste0(srr, "_", rownames(rna@meta.data)))
    #rownames(rna@meta.data) <- paste0(srr, "_", rownames(rna@meta.data))
    rna@meta.data$SRR <- srr
    data[[srr]] <- rna
}

In [10]:
#Merge Seurat objects for individual samples
merged_data <- merge(data[[samples[1]]], y=data[samples[2:length(samples)]], project='HTO', add.cell.ids=samples) 

In [11]:
merged_data@meta.data

Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,nCount_HTO,nFeature_HTO,HTO_maxID,HTO_secondID,HTO_margin,HTO_classification,HTO_classification.global,hash.ID,RNA_snn_res.0.6,seurat_clusters,SRR
Unnamed: 0_level_1,<chr>,<dbl>,<int>,<dbl>,<int>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
SRR27326996_TGATCTTTCGCGTGAC,SeuratProject,10580,3767,192,6,0253-HHTO-03-TTCCGCCTCTCTTTG,0255-HHTO-05-AAGTATCGTTTCGCA,1.281194,0253-HHTO-03-TTCCGCCTCTCTTTG,Singlet,0253-HHTO-03-TTCCGCCTCTCTTTG,1,1,SRR27326996
SRR27326996_TCATTTGGTTGCTCGG,SeuratProject,10191,2167,312,6,0252-HHTO-02-TGATGGCCTATTGGG,0255-HHTO-05-AAGTATCGTTTCGCA,1.208670,0252-HHTO-02-TGATGGCCTATTGGG,Singlet,0252-HHTO-02-TGATGGCCTATTGGG,6,6,SRR27326996
SRR27326996_GCAGGCTCACCAATTG,SeuratProject,19283,5438,478,6,0253-HHTO-03-TTCCGCCTCTCTTTG,0254-HHTO-04-AGTAAGTTCAGCGTA,1.830183,0253-HHTO-03-TTCCGCCTCTCTTTG,Singlet,0253-HHTO-03-TTCCGCCTCTCTTTG,2,2,SRR27326996
SRR27326996_TCGGGACTCTTTGGAG,SeuratProject,16898,4595,609,6,0254-HHTO-04-AGTAAGTTCAGCGTA,0251-HHTO-01-GTCAACTCTTTAGCG,3.808796,0254-HHTO-04-AGTAAGTTCAGCGTA,Singlet,0254-HHTO-04-AGTAAGTTCAGCGTA,0,0,SRR27326996
SRR27326996_TCTCACGCATTGACTG,SeuratProject,14481,4166,439,6,0252-HHTO-02-TGATGGCCTATTGGG,0253-HHTO-03-TTCCGCCTCTCTTTG,1.797123,0252-HHTO-02-TGATGGCCTATTGGG,Singlet,0252-HHTO-02-TGATGGCCTATTGGG,0,0,SRR27326996
SRR27326996_CTCCGATTCGAACCTA,SeuratProject,21830,5644,347,6,0254-HHTO-04-AGTAAGTTCAGCGTA,0253-HHTO-03-TTCCGCCTCTCTTTG,3.155060,0254-HHTO-04-AGTAAGTTCAGCGTA,Singlet,0254-HHTO-04-AGTAAGTTCAGCGTA,0,0,SRR27326996
SRR27326996_CTTACCGTCCAGCACG,SeuratProject,15099,4769,333,6,0256-HHTO-06-GGTTGCCAGATGTCA,0255-HHTO-05-AAGTATCGTTTCGCA,1.729341,0256-HHTO-06-GGTTGCCAGATGTCA,Singlet,0256-HHTO-06-GGTTGCCAGATGTCA,1,1,SRR27326996
SRR27326996_GACCAATGTCGATTTG,SeuratProject,17890,4867,657,6,0254-HHTO-04-AGTAAGTTCAGCGTA,0253-HHTO-03-TTCCGCCTCTCTTTG,3.678370,0254-HHTO-04-AGTAAGTTCAGCGTA,Singlet,0254-HHTO-04-AGTAAGTTCAGCGTA,2,2,SRR27326996
SRR27326996_TCGACGGGTACTGTTG,SeuratProject,14210,4506,397,6,0251-HHTO-01-GTCAACTCTTTAGCG,0254-HHTO-04-AGTAAGTTCAGCGTA,2.564421,0251-HHTO-01-GTCAACTCTTTAGCG,Singlet,0251-HHTO-01-GTCAACTCTTTAGCG,10,10,SRR27326996
SRR27326996_AACAGGGTCTCGACGG,SeuratProject,18770,5316,871,6,0252-HHTO-02-TGATGGCCTATTGGG,0254-HHTO-04-AGTAAGTTCAGCGTA,2.406239,0252-HHTO-02-TGATGGCCTATTGGG,Singlet,0252-HHTO-02-TGATGGCCTATTGGG,0,0,SRR27326996


In [12]:
saveRDS(merged_data, "/nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/merged_HTO.Rds")

In [7]:
merged_data <- readRDS("/nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/merged_HTO.Rds")

In [31]:
hto_bc <- data.frame(id = c(paste0("A025", seq(1, 7))),
                    HTO_seq = c("GTCAACTCTTTAGCG", "TGATGGCCTATTGGG", "TTCCGCCTCTCTTTG", 
                                "AGTAAGTTCAGCGTA", "AAGTATCGTTTCGCA", "GGTTGCCAGATGTCA", "TGTCTTTCCTGCCAG"))
hto_bc

id,HTO_seq
<chr>,<chr>
A0251,GTCAACTCTTTAGCG
A0252,TGATGGCCTATTGGG
A0253,TTCCGCCTCTCTTTG
A0254,AGTAAGTTCAGCGTA
A0255,AAGTATCGTTTCGCA
A0256,GGTTGCCAGATGTCA
A0257,TGTCTTTCCTGCCAG


In [32]:
df <- merged_data@meta.data[, c("HTO_classification", "SRR")]
df$barcode <- unlist(lapply(strsplit(rownames(df), '_', fixed = TRUE), '[', 2))
df$HTO_seq <- sub(".*\\-", "", df$HTO_classification)

tail(df)

Unnamed: 0_level_0,HTO_classification,SRR,barcode,HTO_seq
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
SRR27326987_CATACTTCACTCCCTA,HHTO5-AAGTATCGTTTCGCA,SRR27326987,CATACTTCACTCCCTA,AAGTATCGTTTCGCA
SRR27326987_GGGACAATCAACGAGG,HHTO3-TTCCGCCTCTCTTTG,SRR27326987,GGGACAATCAACGAGG,TTCCGCCTCTCTTTG
SRR27326987_GGTTCTCTCCTTCAGC,HHTO5-AAGTATCGTTTCGCA,SRR27326987,GGTTCTCTCCTTCAGC,AAGTATCGTTTCGCA
SRR27326987_GAGGGATCACAAATGA,HHTO7-TGTCTTTCCTGCCAG,SRR27326987,GAGGGATCACAAATGA,TGTCTTTCCTGCCAG
SRR27326987_TACCTGCAGGGAGGCA,HHTO2-TGATGGCCTATTGGG,SRR27326987,TACCTGCAGGGAGGCA,TGATGGCCTATTGGG
SRR27326987_CTATCCGCACCACTGG,HHTO1-GTCAACTCTTTAGCG,SRR27326987,CTATCCGCACCACTGG,GTCAACTCTTTAGCG


In [35]:
#Islet 67:
# islet 67 GEX samples: SRR27326986, SRR27326987
#A0251 = 24Hr Untreated
#A0252 = 24Hr Untreated
#A0253 = 24Hr Untreated
#A0254 = 24Hr Untreated
#A0255 = 2Hr H2O2
#A0256 = 4Hr Cytokines
#A0257 = 24Hr Cytokines

SRR27326986 <- data.frame(SRR = rep("SRR27326986", 7),
                        id = paste0("A025", seq(1, 7)),
                        Treatment = c(rep("24Hr_Untreated", 4),
                                     "2Hr_H2O2", "4Hr_Cytokines", "24Hr_Cytokines"))
SRR27326987 <- data.frame(SRR = rep("SRR27326987", 7),
                        id = paste0("A025", seq(1, 7)),
                        Treatment = c(rep("24Hr_Untreated", 4),
                                     "2Hr_H2O2", "4Hr_Cytokines", "24Hr_Cytokines"))
tmp <- rbind(SRR27326986, SRR27326987)

In [36]:
#Islet 150, Islet 162 & Islet 168:
# islet 150 GEX samples: SRR27326996, SRR27326997
# islet 162 GEX samples: SRR27326994, SRR27326995
# islet 168 GEX samples: SRR27326992, SRR27326993
#A0251 = 2Hr Thapsigargin
#A0252 = 4Hr Thapsigargin
#A0253 = 4Hr DMSO
#A0254 = 24Hr Untreated
#A0255 = 24h DMSO
#A0256 = 24h Thapsigargin

tmp2 <- data.frame(SRR = rep(c("SRR27326996", "SRR27326997", "SRR27326994", "SRR27326995", "SRR27326992", "SRR27326993"), each = 6),
                   id = rep(paste0("A025", seq(1, 6)), 6),
                   Treatment = rep(c("2Hr_Thapsigargin", "4Hr_Thapsigargin", "4Hr_DMSO", 
                                     "24Hr_Untreated", "24h_DMSO", "24h_Thapsigargin"), 6))
tmp2

SRR,id,Treatment
<chr>,<chr>,<chr>
SRR27326996,A0251,2Hr_Thapsigargin
SRR27326996,A0252,4Hr_Thapsigargin
SRR27326996,A0253,4Hr_DMSO
SRR27326996,A0254,24Hr_Untreated
SRR27326996,A0255,24h_DMSO
SRR27326996,A0256,24h_Thapsigargin
SRR27326997,A0251,2Hr_Thapsigargin
SRR27326997,A0252,4Hr_Thapsigargin
SRR27326997,A0253,4Hr_DMSO
SRR27326997,A0254,24Hr_Untreated


In [37]:
map <- rbind(tmp, tmp2)
map <- inner_join(map, hto_bc)
map

[1m[22mJoining with `by = join_by(id)`


SRR,id,Treatment,HTO_seq
<chr>,<chr>,<chr>,<chr>
SRR27326986,A0251,24Hr_Untreated,GTCAACTCTTTAGCG
SRR27326986,A0252,24Hr_Untreated,TGATGGCCTATTGGG
SRR27326986,A0253,24Hr_Untreated,TTCCGCCTCTCTTTG
SRR27326986,A0254,24Hr_Untreated,AGTAAGTTCAGCGTA
SRR27326986,A0255,2Hr_H2O2,AAGTATCGTTTCGCA
SRR27326986,A0256,4Hr_Cytokines,GGTTGCCAGATGTCA
SRR27326986,A0257,24Hr_Cytokines,TGTCTTTCCTGCCAG
SRR27326987,A0251,24Hr_Untreated,GTCAACTCTTTAGCG
SRR27326987,A0252,24Hr_Untreated,TGATGGCCTATTGGG
SRR27326987,A0253,24Hr_Untreated,TTCCGCCTCTCTTTG


In [21]:
library(dplyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [38]:
dim(df)
df <- inner_join(df, map[, c("SRR", "Treatment", "HTO_seq")])
dim(df)

[1m[22mJoining with `by = join_by(SRR, HTO_seq)`


In [39]:
df

HTO_classification,SRR,barcode,HTO_seq,Treatment
<chr>,<chr>,<chr>,<chr>,<chr>
0253-HHTO-03-TTCCGCCTCTCTTTG,SRR27326996,TGATCTTTCGCGTGAC,TTCCGCCTCTCTTTG,4Hr_DMSO
0252-HHTO-02-TGATGGCCTATTGGG,SRR27326996,TCATTTGGTTGCTCGG,TGATGGCCTATTGGG,4Hr_Thapsigargin
0253-HHTO-03-TTCCGCCTCTCTTTG,SRR27326996,GCAGGCTCACCAATTG,TTCCGCCTCTCTTTG,4Hr_DMSO
0254-HHTO-04-AGTAAGTTCAGCGTA,SRR27326996,TCGGGACTCTTTGGAG,AGTAAGTTCAGCGTA,24Hr_Untreated
0252-HHTO-02-TGATGGCCTATTGGG,SRR27326996,TCTCACGCATTGACTG,TGATGGCCTATTGGG,4Hr_Thapsigargin
0254-HHTO-04-AGTAAGTTCAGCGTA,SRR27326996,CTCCGATTCGAACCTA,AGTAAGTTCAGCGTA,24Hr_Untreated
0256-HHTO-06-GGTTGCCAGATGTCA,SRR27326996,CTTACCGTCCAGCACG,GGTTGCCAGATGTCA,24h_Thapsigargin
0254-HHTO-04-AGTAAGTTCAGCGTA,SRR27326996,GACCAATGTCGATTTG,AGTAAGTTCAGCGTA,24Hr_Untreated
0251-HHTO-01-GTCAACTCTTTAGCG,SRR27326996,TCGACGGGTACTGTTG,GTCAACTCTTTAGCG,2Hr_Thapsigargin
0252-HHTO-02-TGATGGCCTATTGGG,SRR27326996,AACAGGGTCTCGACGG,TGATGGCCTATTGGG,4Hr_Thapsigargin


In [41]:
write.table(df, "/nfs/turbo/umms-scjp-pank/2_IIDP/results/gencode_v39_private/GSE251912/results/singlets/HTO_barcode_maps.txt",
           sep = "\t", quote = F, row.names = F)