In [3]:
import networkx as nx
import plotly
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from IPython.display import IFrame
import json
from networkx.readwrite import json_graph
import re
init_notebook_mode(connected=True)
import plotly.graph_objs as go
dna_ints = "/Users/dborgesr/Documents/chia_origami_ints/primed_.7_origami.bedpe"
chrom_filt = "chr16"
#initiate genome graph
dna_int_graph = nx.Graph(style="filled")
#Set genomewide attributes
dna_int_graph.graph["species"] = "Homo Sapiens"
dna_int_graph.graph["genome_version"] = "hg19"
dna_int_graph.graph["tissue_type"] = "hESC"
#Load DNA interactions
with open(dna_ints) as dna_ints_iter:
    for dna_int in dna_ints_iter:
        arr = dna_int.split()
        x = str(arr[0]) + ":" + str(arr[1]) + "-" + str(arr[2])
        mid_x = (int(arr[1])+int(arr[2]))/2.0
        y = str(arr[3]) + ":" + str(arr[4]) + "-" + str(arr[5])
        mid_y = (int(arr[4])+int(arr[5]))/2.0
        if (arr[0] == chrom_filt) and (arr[3] == chrom_filt):
            dna_int_graph.add_edge(x,y,label=1, capacity = 1, weight=float(arr[6]))
            dna_int_graph.node[x]["color"] = "rgb(174,183,180)"
            dna_int_graph.node[y]["color"] = "rgb(174,183,180)"
axis=dict(showbackground=False,
          showline=False,
          zeroline=False,
          showgrid=False,
          showticklabels=False,
          title="")
layout = go.Layout(
         width=600,
         height=600,
         xaxis=go.XAxis(axis),
         yaxis=go.YAxis(axis),
         showlegend=False,
         scene=go.Scene(
         xaxis=go.XAxis(axis),
         yaxis=go.YAxis(axis),
         zaxis=go.ZAxis(axis)),
    margin=go.Margin(l=0,
                     r=0,
                     b=0,
                     t=0),
    hovermode="closest")

#plot a specific range
dna_range = "chr16:53036275-55396614"
range_split = re.split(r"[-:]",dna_range)
#make edge trace and annotations
#get subgraph of nodes within range
sub_graph = nx.Graph(style="filled")
for node in dna_int_graph.nodes():
    node_split = re.split(r"[-:]",node)
    if (node_split[1]>range_split[1]) and (node_split[2]<range_split[2]): 
        sub_graph.add_node(node, color="rgb(174,183,180)")
        sub_graph.add_edges_from(dna_int_graph.edges(node))

#plot a set of neighborhoods from output of global file
in_dir = "example_output/output_IN_graphs/primed/example/"
global_json = in_dir+"example_global_graph_annotated.json"
#filter nodes by color (target_prom,gene,enhancer,ctcf,cohesin)
keep_col_nodes = ["rgb(17,166,216)","rgb(13,59,224)","rgb(242,12,50)","rgb(6,188,3)","rgb(174,183,180)"]
all_nodes_graph = nx.Graph(style="filled")
#Load JSON graphs
count_col = 0
with open(global_json,"r") as json_comm:
    json_in = json.load(json_comm)
    G = json_graph.node_link_graph(json_in)
    for node in G.nodes(data=True):
        with open(in_dir+node[1]["json_name"]) as json_comm_sub:
            print node[1]["json_name"]
            json_sub = json.load(json_comm_sub)
            sub_G = json_graph.node_link_graph(json_sub)
            for sub_node in sub_G.nodes(data=True):
                sub_split = re.split(r"[-:]",sub_node[0])
                sub_col_node = str(sub_node[1]["color"])
                if ((sub_col_node in keep_col_nodes) and 
                    (sub_split[0] == chrom_filt)):
                    all_nodes_graph.add_node(sub_node[0])
                    if (sub_col_node != "rgb(17,166,216)" 
                        and sub_col_node!= "rgb(13,59,224)"):
                        if(count_col == 3):
                            sub_col_node = "rgb(242,12,50)"
                        if(count_col == 1): 
                            sub_col_node = "rgb(42,179,75)"
                    all_nodes_graph.add_node(sub_node[0], color=sub_col_node)
                    all_nodes_graph.add_edges_from(sub_graph.edges(sub_node[0]))
        count_col +=1
        

example_RPGRIP1L,FTO_lvl1_graph.json
example_CHD9,CHD9,CHD9,CHD9,Y_RNA_lvl1_graph.json
example_CHD9_lvl1_graph.json
example_FTO,JB149426,FTO,IRX3_lvl1_graph.json


In [4]:
all_position = nx.spring_layout(all_nodes_graph,dim=2)  
#make node trace
traceN = go.Scatter(x=[], y=[], mode="markers", text=[],marker=go.Marker(color=[],size=[],opacity=[]))
traceN["name"] = ""
traceN["hoverinfo"] = "text"
for all_node in all_nodes_graph.nodes(data=True):
    text_node = all_node[0]
    traceN["text"].append(text_node.replace("\n","<br>"))
    traceN["marker"]["color"].append("grey")
    traceN["marker"]["size"].append(20)
    traceN["marker"]["opacity"].append(1)
    traceN["x"].append(all_position[all_node[0]][0])
    traceN["y"].append(all_position[all_node[0]][1])
    
traceE = go.Scatter(x=[], y=[], mode="lines", hoverinfo = "none")
traceE["name"] = ""
traceE["line"]["width"] = 1
for edge in all_nodes_graph.edges(data=True):
    traceE["x"] += [all_position[edge[0]][0],all_position[edge[1]][0], None]
    traceE["y"] += [all_position[edge[0]][1],all_position[edge[1]][1], None]

data = go.Data([traceE, traceN])
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig)

## Defining mappings between Cis-Regulatory Elements and the genes they may be regulating using DNA interactions 
Preliminary Examination Proposal  
Diego Borges-Rivera  
Email: dborgesr@mit.edu  
Advisor: Richard Young and Manolis Kellis  
Prelim Committee Members: David Bartel (chair), Aviv Regev, Gene-Wei Li  

# All Cohesin ChiA-PET interactions<br>recovered on Chromosome 6 of hESC

In [5]:
layout["title"] = "All Cohesin-SMC1 ChiA-PET DNA interactions within \n" + chrom_filt + " of " + dna_int_graph.graph["species"]

#make node trace
position=nx.spring_layout(dna_int_graph,dim=2)    
traceN = go.Scatter(x=[], y=[], mode="markers", text=[],marker=go.Marker(color=[],size=[],opacity=[]))
traceN["name"] = ""
traceN["hoverinfo"] = "text"
for node in dna_int_graph.nodes(data=True):
    text_node = node[0]
    traceN["text"].append(text_node.replace("\n","<br>"))
    traceN["marker"]["color"].append("grey")
    traceN["marker"]["size"].append(20)
    traceN["marker"]["opacity"].append(1)
    traceN["x"].append(position[node[0]][0])
    traceN["y"].append(position[node[0]][1])

#make edge trace and annotations
traceE = go.Scatter(x=[], y=[], mode="lines", hoverinfo = "none")
traceE["name"] = ""
traceE["line"]["width"] = 1
for edge in dna_int_graph.edges(data=True):
    traceE["x"] += [position[edge[0]][0],position[edge[1]][0], None]
    traceE["y"] += [position[edge[0]][1],position[edge[1]][1], None]
data = go.Data([traceE, traceN])
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig)

## All Cohesin ChiA-PET interactions with at least one end within chr16:53036275-55396614

In [6]:
sub_position=nx.spring_layout(sub_graph,dim=2)    

#make node trace
traceN = go.Scatter(x=[], y=[], mode="markers", text=[],marker=go.Marker(color=[],size=[],opacity=[]))
traceN["name"] = ""
traceN["hoverinfo"] = "text"
for node in sub_graph.nodes(data=True):
    text_node = node[0]
    traceN["text"].append(text_node.replace("\n","<br>"))
    traceN["marker"]["color"].append("grey")
    traceN["marker"]["size"].append(20)
    traceN["marker"]["opacity"].append(1)
    traceN["x"].append(sub_position[node[0]][0])
    traceN["y"].append(sub_position[node[0]][1])
    
traceE = go.Scatter(x=[], y=[], mode="lines", hoverinfo = "none")
traceE["name"] = ""
traceE["line"]["width"] = 1
for edge in sub_graph.edges(data=True):
    traceE["x"] += [sub_position[edge[0]][0],sub_position[edge[1]][0], None]
    traceE["y"] += [sub_position[edge[0]][1],sub_position[edge[1]][1], None]

data = go.Data([traceE, traceN])
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig)

## 1 Starting “seed” promoters are picked by RNA-Seq

In [7]:
#make node trace
traceN = go.Scatter(x=[], y=[], mode="markers", text=[],marker=go.Marker(color=[],size=[],opacity=[]))
traceN["name"] = ""
traceN["hoverinfo"] = "text"
for all_node in all_nodes_graph.nodes(data=True):
    text_node = all_node[0]
    traceN["text"].append(text_node.replace("\n","<br>"))
    if (("color" in all_node[1]) and (all_node[1]["color"] == "rgb(17,166,216)" or all_node[1]["color"] == "rgb(13,59,224)")):
        traceN["marker"]["color"].append(all_node[1]["color"])
    else:
        traceN["marker"]["color"].append("grey")
    traceN["marker"]["size"].append(20)
    traceN["marker"]["opacity"].append(1)
    traceN["x"].append(all_position[all_node[0]][0])
    traceN["y"].append(all_position[all_node[0]][1])
    
traceE = go.Scatter(x=[], y=[], mode="lines", hoverinfo = "none")
traceE["name"] = ""
traceE["line"]["width"] = 1
for edge in all_nodes_graph.edges(data=True):
    traceE["x"] += [all_position[edge[0]][0],all_position[edge[1]][0], None]
    traceE["y"] += [all_position[edge[0]][1],all_position[edge[1]][1], None]

data = go.Data([traceE, traceN])
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig)

## 2 Crawling out of seed nodes, add node to Insulated Neighborhood (IN) if modularity increases and is contained within a CTCF-CTCF loop

In [8]:
#make node trace
traceN = go.Scatter(x=[], y=[], mode="markers", text=[],marker=go.Marker(color=[],size=[],opacity=[]))
traceN["name"] = ""
traceN["hoverinfo"] = "text"
for all_node in all_nodes_graph.nodes(data=True):
    text_node = all_node[0]
    traceN["text"].append(text_node.replace("\n","<br>"))
    if "color" in all_node[1]:
        traceN["marker"]["color"].append(all_node[1]["color"])
    else:
        traceN["marker"]["color"].append("grey")
    traceN["marker"]["size"].append(20)
    traceN["marker"]["opacity"].append(1)
    traceN["x"].append(all_position[all_node[0]][0])
    traceN["y"].append(all_position[all_node[0]][1])
    
traceE = go.Scatter(x=[], y=[], mode="lines", hoverinfo = "none")
traceE["name"] = ""
traceE["line"]["width"] = 1
for edge in all_nodes_graph.edges(data=True):
    traceE["x"] += [all_position[edge[0]][0],all_position[edge[1]][0], None]
    traceE["y"] += [all_position[edge[0]][1],all_position[edge[1]][1], None]

data = go.Data([traceE, traceN])
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig)

## 3: Reach modularity equilibrium, defining Transcription Start Site (TSS) to Cis-Regulatory Element (CRE) mappings

In [9]:
#make node trace
all_nodes_graph.add_node("chr16:54719028-54719630",color="rgb(170,15,226)")
all_nodes_graph.add_edge("chr16:54719028-54719630","chr16:54684958-54686247")
all_position = nx.spring_layout(all_nodes_graph,dim=2)  
traceN = go.Scatter(x=[], y=[], mode="markers", text=[],marker=go.Marker(color=[],size=[],opacity=[]))
traceN["name"] = ""
traceN["hoverinfo"] = "text"
for all_node in all_nodes_graph.nodes(data=True):
    text_node = all_node[0]
    traceN["text"].append(text_node.replace("\n","<br>"))
    if "color" in all_node[1]:
        traceN["marker"]["color"].append(all_node[1]["color"])
    else:
        traceN["marker"]["color"].append("grey")
    traceN["marker"]["size"].append(20)
    traceN["marker"]["opacity"].append(1)
    traceN["x"].append(all_position[all_node[0]][0])
    traceN["y"].append(all_position[all_node[0]][1])
    
traceE = go.Scatter(x=[], y=[], mode="lines", hoverinfo = "none")
traceE["name"] = ""
traceE["line"]["width"] = 1
for edge in all_nodes_graph.edges(data=True):
    traceE["x"] += [all_position[edge[0]][0],all_position[edge[1]][0], None]
    traceE["y"] += [all_position[edge[0]][1],all_position[edge[1]][1], None]

data = go.Data([traceE, traceN])
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig)

## 4: Predict impact of mutation on a CTCF motif within a chromatin accessible region under a CRE using Convolutional Neural Networks (CNNs) trained on CTCF Chip-Seq and DNAse-Seq

In [10]:
all_nodes_graph.add_node("16:54685419",color="rgb(255,0,233)")
all_nodes_graph.add_edge("16:54685419","chr16:54719028-54719630")
all_position = nx.spring_layout(all_nodes_graph,dim=2)  
traceN = go.Scatter(x=[], y=[], mode="markers", text=[],marker=go.Marker(color=[],size=[],opacity=[]))
traceN["name"] = ""
traceN["hoverinfo"] = "text"
for all_node in all_nodes_graph.nodes(data=True):
    text_node = all_node[0]
    traceN["text"].append(text_node.replace("\n","<br>"))
    if "color" in all_node[1]:
        traceN["marker"]["color"].append(all_node[1]["color"])
    else:
        traceN["marker"]["color"].append("grey")
    traceN["marker"]["size"].append(20)
    traceN["marker"]["opacity"].append(1)
    traceN["x"].append(all_position[all_node[0]][0])
    traceN["y"].append(all_position[all_node[0]][1])
    
traceE = go.Scatter(x=[], y=[], mode="lines", hoverinfo = "none")
traceE["name"] = ""
traceE["line"]["width"] = 1
for edge in all_nodes_graph.edges(data=True):
    traceE["x"] += [all_position[edge[0]][0],all_position[edge[1]][0], None]
    traceE["y"] += [all_position[edge[0]][1],all_position[edge[1]][1], None]

data = go.Data([traceE, traceN])
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig)

## 5: Base Editor 2 (BE2) induced transition mutation in hESC under CTCF motif leading to breakdown of interaction

In [11]:
#Basset in-silico mutagenesis
IFrame(in_dir+"chr16_g.54719331C_T_C_c78_heat.png",width=800, height=200)

In [12]:
#Basset in-silico mutagenesis
IFrame(in_dir+"chr16_g.54719331C_T_T_c78_heat.png",width=800, height=200)

## 6:Measure impact using RNA-Seq and 4C while quantifying change in FPKM and modularity