In [None]:
library(here)
library(Matrix)
library(tidyverse)
library(Seurat)
library(ggpointdensity)
library(scales)
library(reticulate)
library(repr)
#library(DoubletFinder)
library(future)
library(RColorBrewer)
library(edgeR)
library(limma)
#library(PCAtools)
options(future.globals.maxSize = 100000 * 1024^2)
source(here("R_functions","edgeR_function.R"))

annotations = read.csv("R_functions/gene_descriptions.csv", header = F)
colnames(annotations) = c("gene_id", "description")
annotations$gene_id = substr(annotations$gene_id, 1, 9)

proto_genes=read.csv("../data/bulk_data/protoplasting.csv")
proto_list=as.character(proto_genes[abs(proto_genes$logFC) > 1,]$genes)
bulk_data = read.csv("/home/robotmessenger810/data/buckets/single_cell_bucket_3_4_21/IWT_RNA_seq/scRNA_flowers/outputs/bulk_edger_10_16_20.csv")


In [2]:
seu_intd_wt_mut = readRDS(file = "../data/intd_seu_objects/4_12_22_WT_mut.rds")

resolution = .75
set.seed(42)
DefaultAssay(seu_intd_wt_mut) <- "integrated"
options(repr.plot.width=12, repr.plot.height=12)
# Run the standard workflow for visualization and clustering
#all_intd_sct <- ScaleData(all_intd_sct, verbose = FALSE)
seu_intd_wt_mut <- RunPCA(seu_intd_wt_mut, npcs = 100, verbose = FALSE, approx = FALSE)
#From RunPCA doc: Features to compute PCA on. If features=NULL, PCA will be run using the variable features for the Assay. 
#Note that the features must be present in the scaled data. Any requested features that are not scaled or have 0 variance 
#will be dropped, and the PCA will be run using the remaining features.

#previously run 20 PCs as of 2/14/22
seu_intd_wt_mut <- FindNeighbors(seu_intd_wt_mut, dims = 1:20, verbose = FALSE)
seu_intd_wt_mut <- FindClusters(seu_intd_wt_mut, resolution = resolution, algorithm = 3, verbose = FALSE)
seu_intd_wt_mut <- RunUMAP(seu_intd_wt_mut, reduction = "pca", dims = 1:20, verbose = FALSE)

“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”


In [46]:
cluster = "11"
seu_intd_wt_mut@active.assay = "RNA"

wt_1_AZ <- rowSums(as.matrix(GetAssayData(subset(seu_intd_wt_mut, subset = orig.ident == "sc_26_combined"), slot = "counts")[, WhichCells(subset(seu_intd_wt_mut, subset = orig.ident == "sc_26_combined"), ident = cluster)]))
wt_2_AZ <- rowSums(as.matrix(GetAssayData(subset(seu_intd_wt_mut, subset = orig.ident == "sc_67"), slot = "counts")[, WhichCells(subset(seu_intd_wt_mut, subset = orig.ident == "sc_67"), ident = cluster)]))
YFP_1_AZ <- rowSums(as.matrix(GetAssayData(subset(seu_intd_wt_mut, subset = orig.ident == "sc_101"), slot = "counts")[, WhichCells(subset(seu_intd_wt_mut, subset = orig.ident == "sc_101"), ident = cluster)]))
YFP_2_AZ <- rowSums(as.matrix(GetAssayData(subset(seu_intd_wt_mut, subset = orig.ident == "sc_103"), slot = "counts")[, WhichCells(subset(seu_intd_wt_mut, subset = orig.ident == "sc_103"), ident = cluster)]))



In [47]:
cluster = "11"
seu_intd_wt_mut@active.assay = "RNA"

mut_1_AZ <- rowSums(as.matrix(GetAssayData(subset(seu_intd_wt_mut, subset = orig.ident == "sc_27_combined"), slot = "counts")[, WhichCells(subset(seu_intd_wt_mut, subset = orig.ident == "sc_27_combined"), ident = cluster)]))
mut_2_AZ <- rowSums(as.matrix(GetAssayData(subset(seu_intd_wt_mut, subset = orig.ident == "sc_68"), slot = "counts")[, WhichCells(subset(seu_intd_wt_mut, subset = orig.ident == "sc_68"), ident = cluster)]))
KE_1_AZ <- rowSums(as.matrix(GetAssayData(subset(seu_intd_wt_mut, subset = orig.ident == "sc_102"), slot = "counts")[, WhichCells(subset(seu_intd_wt_mut, subset = orig.ident == "sc_102"), ident = cluster)]))
KE_2_AZ <- rowSums(as.matrix(GetAssayData(subset(seu_intd_wt_mut, subset = orig.ident == "sc_104"), slot = "counts")[, WhichCells(subset(seu_intd_wt_mut, subset = orig.ident == "sc_104"), ident = cluster)]))


In [48]:
gene_intersection = intersect(names(wt_1_AZ), names(mut_1_AZ))
wt_1_AZ = wt_1_AZ[gene_intersection]
wt_2_AZ = wt_2_AZ[gene_intersection]
YFP_1_AZ = YFP_1_AZ[gene_intersection]
YFP_2_AZ = YFP_2_AZ[gene_intersection]
mut_1_AZ = mut_1_AZ[gene_intersection]
mut_2_AZ = mut_2_AZ[gene_intersection]
KE_1_AZ = KE_1_AZ[gene_intersection]
KE_2_AZ = KE_2_AZ [gene_intersection]

In [49]:
pb_df = data.frame(cbind(wt_1_AZ , wt_2_AZ, YFP_1_AZ, YFP_2_AZ, mut_1_AZ , mut_2_AZ, KE_1_AZ, KE_2_AZ))
colnames(pb_df) = c("WT1", "WT2", "YFP1", "YFP2", "mut1", "mut2", "KE1", "KE2")
rownames(pb_df) = gene_intersection

In [50]:
write.csv(pb_df, "../data/pseudo_bulk_data/AZ_pbs_4_19_22.csv")

In [51]:
pb_df = read.csv("../data/pseudo_bulk_data/AZ_pbs_4_19_22.csv")
rownames(pb_df) = pb_df[,1]
pb_df[,1] <- NULL

In [52]:
#account for factors in experiment
phenotype=as.factor(c("wt", "wt", "wt", "wt", "mut", "mut", "mut", "mut"))
batch=as.factor(c(0,0,1,1,0,0,1,1))
design <- model.matrix(~phenotype+batch)#+insertion)

#check design matrix isn't singular
print(paste("determinant of XT*X of design matrix is: ", det(t(design)%*%(design))))

#making contrast matrix for tests of interest
my.contrasts <- makeContrasts(s1_v_s2=phenotypewt, levels=design)

[1] "determinant of XT*X of design matrix is:  32"


“Renaming (Intercept) to Intercept”


In [53]:
my.contrasts
design

Unnamed: 0,s1_v_s2
Intercept,0
phenotypewt,1
batch1,0


Unnamed: 0,(Intercept),phenotypewt,batch1
1,1,1,0
2,1,1,0
3,1,1,1
4,1,1,1
5,1,0,0
6,1,0,0
7,1,0,1
8,1,0,1


In [54]:
pb_df["AT3G07970",]

Unnamed: 0_level_0,WT1,WT2,YFP1,YFP2,mut1,mut2,KE1,KE2
Unnamed: 0_level_1,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
AT3G07970,1475,1176,1096,114,18,22,66,35


In [55]:
#put experimental covariates in
bulk_edger_1 =  edgeR_2_sample(pb_df, "WT", "mut", c(1,2,3,4), c(5,6,7,8), annotations, design, my.contrasts)

In [56]:
WT_higher_1 = bulk_edger_1[bulk_edger_1$FDR < .2 & bulk_edger_1$logFC > 1,]
WT_lower_1 = bulk_edger_1[bulk_edger_1$FDR < .2 & bulk_edger_1$logFC < -1,]

In [65]:
dim(WT_higher_1)

In [None]:
write.csv(bulk_edger_1, "../data/pseudo_bulk_data/AZ_edger_4_19_22_factors.csv")

In [3]:
bulk_edger_1 = read.csv("../data/pseudo_bulk_data/AZ_edger_4_19_22_factors.csv")
write.csv(bulk_edger_1, "../data/for_figures/AZ_edger_4_19_22_factors.csv")

In [27]:
dim(bulk_edger_1)