In [None]:
.libPaths("/home/mahat/.conda/envs/r422/lib/R/library")
.libPaths()

In [None]:
suppressMessages({
    library(tidyverse)
    library(ggExtra)
    library(matrixStats)
    library(plyranges)
    library(viridis)
    library(data.table)
    library(ggbio)
    library(ggridges)
    library(ggbeeswarm)
    library(ggpointdensity)
    library(Matrix)
});
source("./scGRO_functions.r");

In [None]:
options(
    repr.plot.width=6,
    repr.plot.height=4,
    jupyter.plot_mimetypes = "image/svg+xml"
);
theme_set(theme_classic() +
    theme(
        axis.title.x = element_text(color="black", size=14, face="bold"), 
        axis.title.y = element_text(color="black", size=14, face="bold"),
        axis.text = element_text(color="black", size=12, face="bold"),
        plot.title = element_text(face="bold", size=14, hjust = 0.5),
        axis.line = element_blank(),
        # axis.ticks = element_blank()
        panel.border = element_rect(colour = "grey", fill=NA, linewidth=1)
    )
);

In [None]:
# Get equation and r^2 as string
# https://groups.google.com/forum/#!topic/ggplot2/1TgH-kG5XMA

lm_eqn = function(x, y) {
    m = lm(y ~ x);
    eq = substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}

In [None]:
#features = read_bed("../data/groHMM_mES_BRsComb_LP-50_UTS10_features_customized_v2.bed");
# features = read_bed("../data/mES_BRsComb_dREGfiltered_features_customized_OSNenhancersPlusSEs_v1.bed");
features = read_bed("../data/dREG_refinedFeatures_mES_mm10_OSNcustomEnhancers_SEs.bed");
names(features) = features$name;
features$score=NULL;
features

# # select genes longer than 5kp from each feature
# # truncate genes longer than 30kb to 30kb
# features = features %>%
#     filter( width(features) >= 3000 | substr(name, 0, 2) != "GN" ) %>%
#     anchor_center() %>%
#     mutate(width=width-1000);
# length(features)

#resize 30+kb genes to 30kb
longf = which( width(features) > 30000 & substr(names(features), 0, 2) == "GN" );
features[longf] = features[longf] %>%
     resize( width = 30000, fix="start" );
features

In [None]:
scGRO  = readRDS("../data/scGROv2p8_consolidated.rds");
counts = readRDS( "../data/scGROv2p8_mapq3qc_filtered_counts.rds" );
# scGRO  = readRDS("../jay_m/data_jay_m/scGROv2p9_filtered_consolidated.rds");
# counts = readRDS( "../jay_m/data_jay_m/scGROv2p9_mapq3qc_filtered_counts.rds" );
# hist(colSums(counts))
# head(scGRO)
# head(counts)

In [None]:
# Merge experiment ID and cell barcode to create unique cell ID across experiments
scGRO = scGRO %>%
    filter( umiQC & plateQC & cellQC & countQC & miRQC ) %>%
    mutate(cellID = factor( paste( Exp, Plate, Cell, sep="-") ) ) %>%
    # filter( !(Exp == "Exp236" & Plate %in% paste0("c0", 5:8)) ) %>%
    # filter( !(Exp == "Exp260b") ) %>%
    resize(width=1, fix="end") %>%
    filter(cellID %in% colnames(counts)) %>%
    # subsetByOverlaps(features) %>%
    select( cellID );
names(scGRO) = NULL;
scGRO

In [None]:
scrambled = scGRO;
scrambled$cellID = droplevels(scrambled$cellID);
scrambled$cellID = sample(scrambled$cellID);

In [None]:
# load dREG peak calls and convert to GRanges
dREG = read.table("../data/PROseq_mES_BRsComb.dREG.peak.full.bed", header=F, stringsAsFactors=F);
colnames(dREG) = c("chr", "start", "end", "score", "pval", "center");
dREG = GRanges(dREG)
dREG

In [None]:
# chr_corr = fread(file="../data/scGROv2p8_mapq3qc_3xtimebins10kbp_1Kpermuted_chisq_empp.csv.gz");
# # chr_corr = fread(file="../data/scGROv2p8_mapq3qc_max10kbp_1Kpermuted_chisq_empp.csv.gz");
#     # filter(emp_p_pos < 0.1 | emp_p_neg < 0.1);
#     # filter(emp_p_pos < 0.01);
# nrow(chr_corr)
# head(chr_corr)

In [None]:
# chr_corrF = chr_corr %>%
#     filter( emp_p_pos <= 0.01 & pA >= 0.025 & pB >= 0.025 & pBoth >= expAB*5);
# dim(chr_corrF)
# chr_corrF[1:5, ]

### plot distance between Genes in same cells and thier positions

In [None]:
geneA = features["GN-Larp7"]
geneA
geneB = features["GN-Chmp2a"]
geneB
plot_position_pairs( scGRO, geneA, geneB, dREG );
ggsave(filename="../plots/Gene-Gene_pairs_Larp7-Chmp2a.pdf", width=4, height=4, units="in")

plot_position_table(scGRO, geneA, geneB);
ggsave(filename="../plots/Gene-Gene_relativePosition_Larp7-Chmp2a.pdf", width=4, height=4, units="in")

In [None]:
geneA = features["GN-Rbmxl2"]
geneA
geneB = features["GN-Hnrnpa1"]
geneB
plot_position_pairs( scGRO, geneA, geneB, dREG );
ggsave(filename="../plots/Gene-Gene_pairs_Rbmxl2-Hnrnpa1.pdf", width=4, height=4, units="in")

plot_position_table(scGRO, geneA, geneB);
ggsave(filename="../plots/Gene-Gene_relativePosition_Rbmxl2-Hnrnpa1.pdf", width=4, height=4, units="in")

In [None]:
# geneA = features["GN-Hnrnpa1"]
geneA = features["GN-Smarcc1"]
geneA
geneB = features["GN-Prkdc"]
geneB
plot_position_pairs( scGRO, geneA, geneB, dREG );
ggsave(filename="../plots/Gene-Gene_pairs_Smarcc1-Prkdc.pdf", width=4, height=4, units="in")
plot_position_table(scGRO, geneA, geneB);
ggsave(filename="../plots/Gene-Gene_relativePosition_Smarcc1-Prkdc.pdf", width=4, height=4, units="in")

In [None]:
# geneA = features["GN-Hnrnpa1"]
geneA = features["GN-Smarcc1"]
geneA
geneB = features["GN-Senp2"]
geneB
plot_position_pairs( scGRO, geneA, geneB, dREG );
# ggsave(filename="../plots/Gene-Gene_pairs_Npm1-.pdf", width=4, height=4, units="in")
plot_position_table(scGRO, geneA, geneB);
# ggsave(filename="../plots/Gene-Gene_relativePosition_Npm1-.pdf", width=4, height=4, units="in")

In [None]:
gene = features["GN-Sox2"]
enh = features["Sox2_105kbDn"]
strand(enh) = "+";
plot_position_pairs( scGRO, gene, enh, dREG );
strand(enh) = "-"
plot_position_pairs( scGRO, gene, enh, dREG );

plot_position_table(scGRO, gene, enh);

In [None]:
geneA = features["GN-Nras"]
geneB = features["GN-Prdx6"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
geneA = features["GN-Dusp12"]	
geneB = features["GN-Rab11b"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
geneA = features["GN-Surf6"]
geneB = features["GN-Bub3"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
geneA = features["GN-Purb"]
geneB = features["GN-Ndufa7"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
# LHX2_TARGET_GENES according to MSigDB
geneA = features["GN-Enc1"]
geneB = features["GN-Rpl7l1"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
geneA = features["GN-Ftsj3"]
geneB = features["GN-Oxa1l"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
geneA = features["GN-Ahsa1"]
geneB = features["GN-Brd2"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
# Pnn interacts with Rnps1
# https://journals.asm.org/doi/10.1128/MCB.23.20.7363-7376.2003#F4
# GN-Pnn	GN-Rnps1	0.03350970	0.02544722	0.0008527286	0.004913076	0.01933373	0.009	0.991
geneA = features["GN-Pnn"]
geneB = features["GN-Rnps1"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
ggsave(filename="../plots/Pnn_Rnps1_sameCell_readPairs.pdf", width=4, height=4, units="in")
plot_position_table(scGRO, geneA, geneB);

In [None]:
geneA = features["GN-Srsf2"]
geneB = features["GN-Morf4l2"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
# https://maayanlab.cloud/Harmonizome/gene_set/RNPS1/Pathway+Commons+Protein-Protein+Interactions
geneA = features["GN-Tcp1"]
geneB = features["GN-Rnps1"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
geneA = features["GN-Tcp1"]
geneB = features["GN-Mrpl18"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
geneA = features["GN-Eif2b5"]
geneB = features["GN-Rnps1"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
geneA = features["GN-Rpl30"]
geneB = features["GN-Rnps1"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
geneA = features["GN-Btf3"]
geneB = features["GN-Rbm3"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
geneA = features["GN-Tcp1"]
geneB = features["GN-Rnps1"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);

In [None]:
geneA = features["GN-"]
geneB = features["GN-"]
plot_position_pairs( scGRO, geneA, geneB, dREG );
plot_position_table(scGRO, geneA, geneB);