In [None]:
library(BUSpaRse)
library(here)
library(Matrix)
library(tidyverse)
library(Seurat)
library(ggpointdensity)
library(scico)
library(scales)
library(DropletUtils)
library(reticulate)
library(repr)
library(DoubletFinder)
library(future)
options(future.globals.maxSize = 100000 * 1024^2)

proto_genes=read.csv("../data/bulk_data/protoplasting.csv")
proto_list=as.character(proto_genes[abs(proto_genes$logFC) > 4,]$genes)

# Slightly modified from BUSpaRse, just to avoid installing a few dependencies not used here
read_count_output <- function(dir, name) {
  dir <- normalizePath(dir, mustWork = TRUE)
  m <- readMM(paste0(dir, "/", name, ".mtx"))
  m <- Matrix::t(m)
  m <- as(m, "dgCMatrix")
  # The matrix read has cells in rows
  ge <- ".genes.txt"
  genes <- readLines(file(paste0(dir, "/", name, ge)))
  barcodes <- readLines(file(paste0(dir, "/", name, ".barcodes.txt")))
  colnames(m) <- barcodes
  rownames(m) <- genes
  return(m)
}


In [2]:
#BE SURE TO RUN read_count_output FUNCTION ABOVE FIRST. THIS IS NECESSARY TO READ DATA.
#setwd
setwd(here())

#all spliced data
wt1_spliced = read_count_output("../data/sc_26", "spliced")

wt2_spliced = read_count_output("../data/sc_67", "spliced")

mut1_spliced = read_count_output("../data/sc_27", "spliced")

mut2_spliced = read_count_output("../data/sc_68", "spliced")

#all unspliced data
wt1_unspliced = read_count_output("../data/sc_26", "unspliced")

wt2_unspliced = read_count_output("../data/sc_67", "unspliced")

mut1_unspliced = read_count_output("../data/sc_27", "unspliced")

mut2_unspliced = read_count_output("../data/sc_68", "unspliced")

In [3]:
#pull out only arabidopsis genes
#WT1
wt1_spliced_arab = wt1_spliced[grepl("AT",unlist(wt1_spliced@Dimnames[1]), fixed=TRUE),, drop=FALSE]

wt1_unspliced_arab = wt1_unspliced[grepl("AT",unlist(wt1_unspliced@Dimnames[1]), fixed=TRUE),, drop=FALSE]

#WT2
wt2_spliced_arab = wt2_spliced[grepl("AT",unlist(wt2_spliced@Dimnames[1]), fixed=TRUE),, drop=FALSE]

wt2_unspliced_arab = wt2_unspliced[grepl("AT",unlist(wt2_unspliced@Dimnames[1]), fixed=TRUE),, drop=FALSE]

#MUT1
mut1_spliced_arab = mut1_spliced[grepl("AT",unlist(mut1_spliced@Dimnames[1]), fixed=TRUE),, drop=FALSE]

mut1_unspliced_arab = mut1_unspliced[grepl("AT",unlist(mut1_unspliced@Dimnames[1]), fixed=TRUE),, drop=FALSE]

#MUT2
mut2_spliced_arab = mut2_spliced[grepl("AT",unlist(mut2_spliced@Dimnames[1]), fixed=TRUE),, drop=FALSE]

mut2_unspliced_arab = mut2_unspliced[grepl("AT",unlist(mut2_unspliced@Dimnames[1]), fixed=TRUE),, drop=FALSE]

In [4]:
#Make combined spliced/unspliced. Assume real cells will be present in both lists, so take barcodes in intersection, and intersect with the emptyDrops called cells
#WT1
shared = intersect(colnames(wt1_spliced_arab), colnames(wt1_unspliced_arab))
wt1_combined = wt1_spliced_arab[,shared] + wt1_unspliced_arab[,shared]

#WT2
shared = intersect(colnames(wt2_spliced_arab), colnames(wt2_unspliced_arab))
wt2_combined = wt2_spliced_arab[,shared] + wt2_unspliced_arab[,shared]

#mut1
shared = intersect(colnames(mut1_spliced_arab), colnames(mut1_unspliced_arab))
mut1_combined =mut1_spliced_arab[,shared] + mut1_unspliced_arab[,shared]

#mut2
shared = intersect(colnames(mut2_spliced_arab), colnames(mut2_unspliced_arab))
mut2_combined =mut2_spliced_arab[,shared] + mut2_unspliced_arab[,shared]

#MAKE COMBINED SPLICED/UNSPLICED COUNT MATRICES FOR ALL SAMPLES. THEN REMOVE EMPTYDROPS FOR CELLS. THEN REMOVE DOUBLETS. THEN ROCK N ROLL

In [6]:
t = as(wt1_combined, "dgTMatrix")
writeMM(t, "/home/robotmessenger810/sc_analysis/data/raw_count_matrices/combined_pre_empty_drops/wt_1_matrix.mtx")

t = as(wt2_combined, "dgTMatrix")
writeMM(t, "/home/robotmessenger810/sc_analysis/data/raw_count_matrices/combined_pre_empty_drops/wt_2_matrix.mtx")

t = as(mut1_combined, "dgTMatrix")
writeMM(t, "/home/robotmessenger810/sc_analysis/data/raw_count_matrices/combined_pre_empty_drops/mut_1_matrix.mtx")

t = as(mut2_combined, "dgTMatrix")
writeMM(t, "/home/robotmessenger810/sc_analysis/data/raw_count_matrices/combined_pre_empty_drops/mut_2_matrix.mtx")

NULL

NULL

NULL

NULL