In [1]:
suppressPackageStartupMessages(library(cicero))
suppressPackageStartupMessages(library(stringr))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(parallel))

In [2]:
wd = '/nfs/lab/elisha/islets/group/cicero/combined/annotations/' # Folder with input connections files, have to input only filtered connections
plotFolder = '/nfs/lab/elisha/islets/group/cicero/combined/annotations/' # Folder for writing plots to
setwd(wd)
gene_annotation <- read.table('/nfs/lab/jnewsome/References/gencode.v19.cicero_gene_annotation.txt', header=T, stringsAsFactors=FALSE)
gene_annotation <- gene_annotation[gene_annotation[['feature']]=='protein_coding',]

genesToPlotFile = 'new_genes.txt' # text file, list of genes to look at
referenceGenePeakRangeCellTypeFile = 'refCell.bed' # file with ranges for genes, 
                                        #ie farthest associate peak locations, 
                                        #in data set, and then the cell types 
                                        #that they are present in in data set

In [3]:
clusters  <- c('alpha', 'beta', 
               'delta', 'stellate')

In [4]:
# Read peak tables
vectorOfPeakTables <- vector(mode = "list", length = length(clusters))

for (i in 1: length(clusters)){
    conns  = read.table(file.path(wd, paste0(  clusters[i], '_sep.bed')), 
                        header=F, sep=" ")
    print(clusters[i])
    names(conns) <- c("Peak1", "Peak2", "coaccess", "Gene", "GeneAssociatedPeak")
    # chr15_99457855_99458295 chr15_99458297_99459439 0.464312378057795 IGF1R Peak2
    vectorOfPeakTables[[i]] <- conns
}

# Read gene referenceFile
refTable = read.table(file.path(wd, referenceGenePeakRangeCellTypeFile), header=T, sep="\t")

# read genes of interest file
#genesToPlotList <- read.table(genesToPlotFile)
genesToPlotList <- scan(genesToPlotFile, what="", sep="\n")


[1] "alpha"
[1] "beta"
[1] "delta"
[1] "stellate"


In [5]:
checkForGene <- function (conns, geneName){
    df <- conns[grep(geneName, conns$Gene), ]
   # print(head(df))
    print(paste0("nrow(df) = ", nrow(df)))
    if (nrow(df) > 0){
        return (TRUE)
    } else {
        return (FALSE)
    }
}

plotTheConnection <- function(plotFolderName, conns, chrom, start, end, coACutoff, connectionWidth, ymaxHeight, geneName, clusterName){
    pdfName <- paste0( plotFolder, 'NEW_cyt.', geneName, '.', chrom, '_', start, '_', end, '.', clusterName, 
                       '.ciceroPlot.pdf')
    
    pdf(pdfName) 
    plot_connections(conns, chrom, start, end, 
                gene_model = gene_annotation, 
                coaccess_cutoff = coACutoff, 
                connection_ymax=ymaxHeight,
                connection_width = connectionWidth, 
                collapseTranscripts = "longest" )
    
    
    # example: 
    #plot_connections(conns, "chr4", 25577267, 26503732, 
    #            gene_model = gene_annotation_sample, 
    #            coaccess_cutoff = .05, 
    #            connection_width = .5, 
    #            collapseTranscripts = "longest" )
    dev.off()
     
}


cherryPickConnectionPlot <- function(plotFolderName, chrom, start, end, geneName, 
                                     refTable, clusters, vectorOfPeakTables, coACutoff, 
                                     connectionWidth, ymaxHeight){
    match <- refTable[ which(refTable$Gene == geneName), ]
    print(match)
    cellTypes = match$CellTypes
    chrom <- toString(chrom)
    cellTypes <- toString(cellTypes)
    print(paste0("geneName = ", geneName, "  loc = ", chrom, "   ", start, "   ", end ))

    if (nrow(match) == 1){
        for (k in 1:length(clusters)){
            if (grepl(clusters[k], cellTypes, fixed=TRUE) ){
                print(paste0("cluster = ",clusters[k]))
           #print(grepl(clusters[i], cellTypes, fixed=TRUE))
               #hasPeaks <- checkForGene(vectorOfPeakTables[[k]], geneName)
               #print(paste0("cluster = ",clusters[k],"    hasPeaks = ", hasPeaks))
               #if(hasPeaks){
               conns <- vectorOfPeakTables[[k]]
               connsGene <- conns[ which(conns$Gene == geneName), ]
               print(connsGene[,c("Peak1", "Peak2", "Gene")])

               plotTheConnection(plotFolderName, connsGene, chrom, 
                        start, end, coACutoff, connectionWidth,
                        ymaxHeight, geneName, clusters[k])
               #}
           }
        }
    }   else   {
        print(paste0(geneName, "  has more than 1 entry in gene-cell type reference file. can't plot"))
    }
    
}





In [6]:
#print(genesToPlotList)


for (i in 1:length(genesToPlotList)){
    geneName = genesToPlotList[i]

    #geneMatch <- refTable[grep(geneName, refTable$Gene), ]
    geneMatch <- refTable[ which(refTable$Gene == geneName), ]
    chrom = geneMatch$Chromosome #These are column names so if you change colname in refbed file, update here too
    start = geneMatch$Start 
    end = geneMatch$Stop
    cellTypes = geneMatch$CellTypes
    chrom <- toString(chrom)
    cellTypes <- toString(cellTypes)
    
    print(paste0("geneName = ", geneName, "  loc = ", chrom, "   ", start, "   ", end))
    print(paste0("    len geneMatch = ", length(geneMatch), "   nrow(geneMatch) = ", nrow(geneMatch), "  ncol(geneMatch = )", ncol(geneMatch)))
    print(paste0("  cellTypes = ", cellTypes))
    if (nrow(geneMatch) == 1){
        for (k in 1:length(clusters)){
            if (grepl(clusters[k], cellTypes, fixed=TRUE) ){
                print(paste0("cluster = ",clusters[k]))
           #print(grepl(clusters[i], cellTypes, fixed=TRUE))
               #hasPeaks <- checkForGene(vectorOfPeakTables[[k]], geneName)
               #print(paste0("cluster = ",clusters[k],"    hasPeaks = ", hasPeaks))
               #if(hasPeaks){
               
               conns <- vectorOfPeakTables[[k]]
                print(conns$gene)
                print(paste0("  conns$gene = ", conns$gene))
                print(paste0("  geneName = ", geneName))
                if (which(conns$Gene == geneName)) {
                   connsGene <- conns[ which(conns$Gene == geneName), ]
                }
                else {
                    connsGene <- conns[ which(split(conns$Gene, ',') == geneName), ]
               #print(connsGene[,c("Peak1", "Peak2", "Gene")])
                }
               plotTheConnection(plotFolder, connsGene, chrom, 
                        start, end, 0.05, 
                        0.5, 1, geneName, clusters[k])
               #}
           }
        }
    }
    else   {
        print(paste0(geneName, "  has more than 1 entry in gene-cell type reference file. can't plot"))
    }
    
    print("")
    
    
    

}



[1] "geneName = PDX1  loc = chr13   27518056   28898701"
[1] "    len geneMatch = 5   nrow(geneMatch) = 1  ncol(geneMatch = )5"
[1] "  cellTypes = alpha,beta,delta,stellate"
[1] "cluster = alpha"
NULL


"the condition has length > 1 and only the first element will be used"


ERROR: Error in pdf(pdfName): cannot open file '/nfs/lab/elisha/islets/group/cicero/combined/annotations/NEW_cyt.PDX1.chr13_27518056_28898701.alpha.ciceroPlot.pdf'


## I didn't use this one below


In [13]:
# cherry pick
# Col1a2
cherryPickConnectionPlot(plotFolder, 'chr7', 
                         93000000, 95000000, 'COL1A2', 
                         refTable, clusters, 
                         vectorOfPeakTables, 0.05, 1, 1)




sftpb_chrom = 'chr2'
sftpb_start = '84000000'
sftpb_end = '86700000'
sftpb_Match <- refTable[ which(refTable$Gene == 'SFTPB'), ]
sftpb_cellTypes = sftpb_Match$CellTypes
sftpb_chrom <- toString(sftpb_chrom)
sftpb_cellTypes <- toString(sftpb_cellTypes)

print(paste0("geneName = ", 'SFTPB', "  loc = ", sftpb_chrom, "   ", sftpb_start, "   ", sftpb_end ))
#print(paste0("    len geneMatch = ", length(geneMatch), "   nrow(geneMatch) = ", nrow(geneMatch), "  ncol(geneMatch = )", ncol(geneMatch)))
#print(paste0("  cellTypes = ", cellTypes))



if (nrow(sftpb_Match) == 1){
    for (k in 1:length(clusters)){
        if (grepl(clusters[k], sftpb_cellTypes, fixed=TRUE) ){
            print(paste0("cluster = ",clusters[k]))
       #print(grepl(clusters[i], cellTypes, fixed=TRUE))
           #hasPeaks <- checkForGene(vectorOfPeakTables[[k]], geneName)
           #print(paste0("cluster = ",clusters[k],"    hasPeaks = ", hasPeaks))
           #if(hasPeaks){
           conns <- vectorOfPeakTables[[k]]
           connsGene <- conns[ which(conns$Gene == 'SFTPB'), ]
           print(connsGene[,c("Peak1", "Peak2", "Gene")])

           plotTheConnection(plotFolder, connsGene, sftpb_chrom, 
                    84000000, 86700000, 0.05, 
                    1, 1, 'SFTPB', clusters[k])
           #}
       }
    }
}

#SFTPB
cherryPickConnectionPlot <- function(plotFolder, 'chr2', 84000000, 86700000, 'SFTPB', refTable, clusters, vectorOfPeakTables, 0.05, 1, 1){






[1] "geneName = COL6A2  loc = chr7   93000000   95000000"
[1] "cluster = CD34_fibroblast"
                       Peak1                  Peak2   Gene
63    chr7_93895403_93896176 chr7_94023396_94025080 COL1A2
419   chr7_94004481_94005417 chr7_94023396_94025080 COL1A2
3245  chr7_93812178_93813232 chr7_94023396_94025080 COL1A2
5808  chr7_94053114_94053629 chr7_94843097_94843837 COL1A2
8305  chr7_93543837_93544460 chr7_94023396_94025080 COL1A2
12540 chr7_93906146_93906656 chr7_94023396_94025080 COL1A2
14449 chr7_94023396_94025080 chr7_94843097_94843837 COL1A2
16623 chr7_93926623_93927173 chr7_94023396_94025080 COL1A2
18756 chr7_93819473_93820503 chr7_94023396_94025080 COL1A2
20630 chr7_94023396_94025080 chr7_94326507_94327173 COL1A2
21429 chr7_93539515_93540290 chr7_94023396_94025080 COL1A2
21752 chr7_93953769_93954396 chr7_94023396_94025080 COL1A2
[1] "cluster = Matrix_fibroblast"
                       Peak1                  Peak2   Gene
27600 chr7_94023396_94025080 chr7_94600270_9460109