In [1]:
#THIS SCRIPT PERFORMS AZ CLUSTER IDENTIFICATION

suppressMessages(library(Seurat))
library(ggplot2)

bulk_data = read.csv("../../data/buckets/single_cell_bucket_3_4_21/IWT_RNA_seq/scRNA_flowers/outputs/bulk_edger_10_16_20.csv")

annotations = read.csv("R_functions/gene_descriptions.csv", header = F)
colnames(annotations) = c("gene_id", "description")
annotations$gene_id = substr(annotations$gene_id, 1, 9)

bp = read.csv("../data/shiny_go_analysis/figure_3/bp.csv")
cc = read.csv("../data/shiny_go_analysis/figure_3/cc.csv")
mf = read.csv("../data/shiny_go_analysis/figure_3/mf.csv")

In [2]:
sessionInfo()

R version 3.6.3 (2020-02-29)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: Ubuntu 20.04.2 LTS

Matrix products: default
BLAS/LAPACK: /home/robotmessenger810/anaconda3/envs/r_3/lib/libopenblasp-r0.3.9.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] ggplot2_3.3.5 Seurat_3.1.5 

loaded via a namespace (and not attached):
 [1] httr_1.4.2         tidyr_1.1.3        jsonlite_1.7.2     viridisLite_0.4.0 
 [5] splines_3.6.3      leiden_0.3.9       ggrepel_0.9.1      globals_0.12.5    
 [9] pillar_1.6.4       lattice_0.20-45    glue_1.6.0  

In [None]:
seu_intd_wt_mut = readRDS(file = "../data/intd_seu_objects/4_12_22_WT_mut.rds")

resolution = .75
set.seed(42)
DefaultAssay(seu_intd_wt_mut) <- "integrated"
options(repr.plot.width=12, repr.plot.height=12)
seu_intd_wt_mut <- RunPCA(seu_intd_wt_mut, npcs = 100, verbose = FALSE, approx = FALSE)
seu_intd_wt_mut <- FindNeighbors(seu_intd_wt_mut, dims = 1:20, verbose = FALSE)
seu_intd_wt_mut <- FindClusters(seu_intd_wt_mut, resolution = resolution, algorithm = 3, verbose = FALSE)
seu_intd_wt_mut <- RunUMAP(seu_intd_wt_mut, reduction = "pca", dims = 1:20, verbose = FALSE)

In [None]:
options(repr.plot.width= 20, repr.plot.height=10)
DimPlot(seu_intd_wt_mut, reduction = "umap", label = TRUE, pt.size = 2, split.by = "geno")#, cols = c("0" = "red"))

In [None]:
seu_intd_wt = subset(seu_intd_wt_mut, subset = geno == "WT")

In [None]:
#findmarkers
cluster_AZ_all = FindAllMarkers(seu_intd_wt,  logfc.threshold = 0, max.cells.per.ident = 1000)

In [None]:
write.csv(cluster_AZ_all, file = paste("../data/for_figures/", "AZ_markers_WT_ALL_res_75_April_25_22", ".csv", sep=""))

In [None]:
cluster_AZ_all = readRDS(paste0("../data/markers/", "AZ_markers_WT_ALL_res_75_April_19_22", ".rds"))

In [None]:
head(cluster_AZ_all[cluster_AZ_all$cluster == 11,])

In [None]:
#FOR GO ANALYSIS
#write AZ specific genes as well as all genes with high enough expression to be included in the analysis (ie the universe of genes for gene set testing)
write.csv(cluster_AZ_all[cluster_AZ_all$cluster == 11,], file = paste("../data/for_figures/", "AZ_spec_genes_universe_WT_res_75_4_25_22", ".csv", sep=""), row.names = FALSE)
write.csv(unique(cluster_AZ_all$gene), file = paste("../data/for_figures/", "WT_universe_spec_genes_WT_res_75_4_25_22", ".csv", sep=""), row.names = FALSE)

In [1]:
#QRT2 data
kwak_ptpms=read.csv("../data/counts/kwak_ptpms.csv")
rownames(kwak_ptpms) = kwak_ptpms$X
kwak_ptpms[,c(1,2,3)] =NULL
colnames(kwak_ptpms) = "counts"

#HAE_YFP sorted
YFP_KE = read.csv("../data/counts/HAE_sorted.csv")
YFP_av = data.frame(YFP_KE[,2])
rownames(YFP_av) = YFP_KE[,1]               


In [None]:
DefaultAssay(seu_intd_wt) = "RNA"

In [None]:
#get pseudobulk for each cluster to compare with kwak data
pbs = list()
count = 1
for (l in levels(seu_intd_wt@meta.data$seurat_clusters)) {
    pbs[[count]] = rowSums(as.matrix(GetAssayData(seu_intd_wt, slot = "counts")[, WhichCells(seu_intd_wt, ident = l)])) 
    count = count + 1
}

saveRDS(pbs, "../data/counts/cluster_pbs_4_13_22")

In [None]:
pbs = readRDS("../data/counts/cluster_pbs_4_13_22")

In [None]:
#convert pseudobulk to TPM
count = 1
for (c in pbs) {
    pbs[[count]] = data.frame(pbs[[count]])/sum(data.frame(pbs[[count]]))*1000000
    rns = rownames(pbs[[count]])
    pbs[[count]] = pbs[[count]][order(rns),, drop = FALSE]
    count = count + 1
}

In [None]:
#QRT2
#set dataset
dataset = kwak_ptpms
cors_spearman = vector()
count = 1

seu_intd_wt@meta.data$kwak_cor = NULL

for (cluster in c(1:length(levels(seu_intd_wt@meta.data$seurat_clusters)))){
    test = cbind(pbs[[cluster]][intersect(rownames(pbs[[cluster]]), rownames(dataset)),],dataset[intersect(rownames(pbs[[cluster]]), rownames(dataset)),])
    cors_spearman[count] = cor(log(test[,1]+.1), log(test[,2]+.1), method = "spearman")
    count = count + 1
}

for (i in c(1:length(levels(seu_intd_wt@meta.data$seurat_clusters)))){
    seu_intd_wt@meta.data$kwak_cor[seu_intd_wt@meta.data$seurat_clusters == toString(i-1)] = cors_spearman[i]
}

plot = FeaturePlot(seu_intd_wt,  features = "kwak_cor", pt.size = 1.5, cols = c("gray", "red"))
print(plot)   
ggsave(file=paste0("../data/for_figures/UMAPs/kwak_cor_wt_2_1_22.png"), plot=plot, width=10, height=10)

In [None]:
#HAE
#set dataset
dataset = YFP_av
cors_spearman = vector()
count = 1

seu_intd_wt@meta.data$HAE_YFP = NULL

for (cluster in c(1:length(levels(seu_intd_wt@meta.data$seurat_clusters)))){
    test = cbind(pbs[[cluster]][intersect(rownames(pbs[[cluster]]), rownames(dataset)),],dataset[intersect(rownames(pbs[[cluster]]), rownames(dataset)),])
    cors_spearman[count] = cor(log(test[,1]+.1), log(test[,2]+.1), method = "spearman")
    count = count + 1
}

for (i in c(1:length(levels(seu_intd_wt@meta.data$seurat_clusters)))){
    seu_intd_wt@meta.data$HAE_YFP[seu_intd_wt@meta.data$seurat_clusters == toString(i-1)] = cors_spearman[i]
}

plot = FeaturePlot(seu_intd_wt,  features = "HAE_YFP", pt.size = 1.5, cols = c("white", "red"))
print(plot)   
ggsave(file=paste0("/home/robotmessenger810/sc_analysis/data/for_figures/UMAPs/HAE_YFP_cor_wt_2_1_22.png"), plot=plot, width=10, height=10)