### Make bedgraph files:

In [None]:
.libPaths("/home/mahat/.conda/envs/r422/lib/R/library")
.libPaths()

In [None]:
suppressMessages({
    library(tidyverse)
    library(scales)
    library(foreach)
    library(doParallel)
    library(rtracklayer)
    library(plyranges)
    library(Matrix)
    library(ggcorrplot)
});

In [None]:
registerDoParallel(8);
options(
    repr.plot.width=5,
    repr.plot.height=3,
    jupyter.plot_mimetypes = "image/svg+xml"
);
theme_set(theme_classic());

## 1. Load scGRO-seq reads and custom feature annotations

In [None]:
# load groHMM-extended genes and enhancers
# features = read_bed("../data/groHMM_mES_BRsComb_LP-50_UTS10_features_customized_v2.bed");
features = read_bed("../data/groHMM_dREG_refinedFeatures_mES_mm10_OSNcustomEnhancers_SEs.bed");
names(features) = features$name;
features$name=NULL;
features$score=NULL;

# truncate long features to 20kb to reduce bias
#longf = which( width(features) > 20000 );
#features[longf] = features[longf] %>%
#    resize( width = 20000, fix="start" );

In [None]:
# load desired scGRO dataset
scGRO = readRDS("../data/scGROv2p8_consolidated.rds");
length(scGRO)
counts = readRDS( "../data/scGROv2p8_mapq3qc_filtered_counts.rds" );
dim(counts)

In [None]:
# Merge experiment ID and cell barcode to create unique cell ID across experiments
scGROfilt = scGRO %>%
    filter( mapq >= 3 & umiQC & plateQC & cellQC & countQC & miRQC ) %>%
    # filter( umiQC & miRQC ) %>%
    mutate( ExpID = factor(paste( Exp, sep="") ) ) %>%
    mutate( PlateID = factor(paste( Exp, Plate, sep="-") ) ) %>%
    mutate( cellID = factor( paste( Exp, Plate, Cell, sep="-") ) ) %>%
    # filter( seqnames != "chrM" ) %>%
    resize( width=1, fix="end" ) %>%
    # Only keep reads that belong to filtered cells
    filter( cellID %in% colnames(counts) ) %>%
    select( ExpID, PlateID, cellID );
# names(scGRO) = NULL;
scGROfilt

In [None]:
unique(scGROfilt$ExpID)
unique(scGROfilt$PlateID)

In [None]:
# begraph of all cells
id = "ExpXXX_cXX"
outf = paste0( "../data/scGROv2p8_mapq3qc_filtered_BedGraphs/", id, "_pl.bedGraph" );
counts = scGROfilt %>%
    filter(strand == "+") %>%
    coverage();
export(counts, format="bedGraph", con=outf);

outf = paste0( "../data/scGROv2p8_mapq3qc_filtered_BedGraphs/", id, "_mn.bedGraph" );
counts = scGROfilt %>%
    filter(strand == "-") %>%
    coverage()
export(counts, format="bedGraph", con=outf);

In [None]:
# begraph of all cells except Exp236_c05-c08
id = "ExpXXX_BUT_236_c05-08"
outf = paste0( "../data/scGROv2p8_mapq3qc_filtered_BedGraphs/", id, "_pl.bedGraph" );
counts = scGROfilt %>%
    filter(strand == "+" & !PlateID %in% c("Exp236-c05", "Exp236-c06", "Exp236-c07", "Exp236-c08")) %>%
    coverage();
export(counts, format="bedGraph", con=outf);

outf = paste0( "../data/scGROv2p8_mapq3qc_filtered_BedGraphs/", id, "_mn.bedGraph" );
counts = scGROfilt %>%
    filter(strand == "-" & !PlateID %in% c("Exp236-c05", "Exp236-c06", "Exp236-c07", "Exp236-c08")) %>%
    coverage()
export(counts, format="bedGraph", con=outf);

In [None]:
#Exp level:
scGROfilt$ExpID = droplevels(scGROfilt$ExpID);

foreach(
    id = levels(scGROfilt$ExpID)
) %do% {
    outf = paste0( "../data/scGROv2p8_mapq3qc_filtered_BedGraphs/", id, "_pl.bedGraph" );
    counts = scGROfilt %>%
        filter(strand == "+" & ExpID == id) %>%
        coverage();
    export(counts, format="bedGraph", con=outf);
    
    outf = paste0( "../data/scGROv2p8_mapq3qc_filtered_BedGraphs/", id, "_mn.bedGraph" );
    counts = scGROfilt %>%
        filter(strand == "-" & ExpID == id) %>%
        coverage()
    export(counts, format="bedGraph", con=outf);
    return();
}

In [None]:
#Plate level:
scGROfilt$PlateID = droplevels(scGROfilt$PlateID);

foreach(
    id = levels(scGROfilt$PlateID)
) %do% {
    outf = paste0( "../data/scGROv2p8_mapq3qc_filtered_BedGraphs/", id, "_pl.bedGraph" );
    counts = scGROfilt %>%
        filter(strand == "+" & PlateID == id) %>%
        coverage();
    export(counts, format="bedGraph", con=outf);
    
    outf = paste0( "../data/scGROv2p8_mapq3qc_filtered_BedGraphs/", id, "_mn.bedGraph" );
    counts = scGROfilt %>%
        filter(strand == "-" & PlateID == id) %>%
        coverage()
    export(counts, format="bedGraph", con=outf);
    return();
}

In [None]:
scGROfiltExp264a = scGROfilt %>%
                filter(ExpID == "Exp264a");
scGROfiltExp264a

unique(scGROfiltExp264a$ExpID)
unique(scGROfiltExp264a$PlateID)

length(levels(scGROfiltExp264a$cellID));
scGROfiltExp264a$cellID = droplevels(scGROfiltExp264a$cellID);
length(levels(scGROfiltExp264a$cellID));

In [None]:
# generate bedGraphs (or bigWigs) of read coverage for individual cells in Exp264a

foreach(
    id = levels(scGROfiltExp264a$cellID)
) %do% {
    outf = paste0( "../data/scGROv2p8_filtered_Exp264a_singleCells_BedGraphs/", id, "_pl.bedGraph" );
    counts = scGROfiltExp264a %>%
        filter(strand == "+" & cellID == id) %>%
        coverage();
    export(counts, format="bedGraph", con=outf);
    
    outf = paste0( "../data/scGROv2p8_filtered_Exp264a_singleCells_BedGraphs/", id, "_mn.bedGraph" );
    counts = scGROfiltExp264a %>%
        filter(strand == "-" & cellID == id) %>%
        coverage()
    export(counts, format="bedGraph", con=outf);
    return();
}