In [105]:
import pandas as pd
import networkx as nx
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = 'plotly_white'

In [2]:
network = pd.read_csv('data/cell_lines_corr-above0-5-0-323.net', sep='\t')

In [3]:
nodes = pd.read_csv('data/cell_lines_corr-above0-5-0-323.nodes', sep='\t')

In [4]:
cancer_lookup = dict(zip(nodes['Node_ID'], nodes['Primary Disease']))

In [5]:
cell_line_lookup = dict(zip(nodes['Node_ID'], nodes['DepMap_ID']))

In [6]:
network['#SOURCE_Cancer']=[cancer_lookup[i] for i in network['#SOURCE']]
network['#TARGET_Cancer']=[cancer_lookup[i] for i in network['#TARGET']]
network['#SOURCE_Cell_Line']=[cell_line_lookup[i] for i in network['#SOURCE']]
network['#TARGET_Cell_Line']=[cell_line_lookup[i] for i in network['#TARGET']]

In [56]:
def table(cancer):
    # Get all of the node ids for the selected cancer type
    focus_IDs = list(nodes[nodes['Primary Disease']==cancer]['Node_ID'])
    # Find all relevant links for the selected cancer type
    subset = network[network['#SOURCE'].isin(focus_IDs) | network['#TARGET'].isin(focus_IDs)].copy()
    # Find the edges going the wrong direction
    to_flip = subset[subset['#SOURCE_Cancer']!=cancer]
    # Clip the df to only the edges going in the right direction
    subset = subset[subset['#SOURCE_Cancer']==cancer]
    # Flip around the wrong-direction edges so the source is the selected cancer type
    flipped = to_flip.copy()
    flipped['#SOURCE'] = to_flip['#TARGET']
    flipped['#TARGET'] = to_flip['#SOURCE']
    flipped['#SOURCE_Cancer'] = to_flip['#TARGET_Cancer']
    flipped['#TARGET_Cancer'] = to_flip['#SOURCE_Cancer']
    flipped['#SOURCE_Cell_Line'] = to_flip['#TARGET_Cell_Line']
    flipped['#TARGET_Cell_Line'] = to_flip['#SOURCE_Cell_Line']
    # Add the flipped row back to the summary data frame
    subset = subset.append(flipped)
    summ = pd.DataFrame(subset['#TARGET_Cancer'].value_counts()).rename(columns={'#TARGET_Cancer':'#NODE_Count'})
    summ['#NODE_Pct']=[c/sum(summ['#NODE_Count']) for c in summ['#NODE_Count']]
    weights = pd.DataFrame(subset.groupby('#TARGET_Cancer').mean()).filter(['#WEIGHT']).rename(columns={'#WEIGHT':'#WEIGHT_Average'})
    summ = summ.merge(weights, left_index=True, right_index=True)
    return {'subset':subset,'summary':summ, 'weights':weights}

In [67]:
def chart(cancer):
    summ = table(cancer)['summary']
    indices = list(summ.index)
    colors = ['lightslategray',] * len(indices)
    if cancer in indices:
        n = indices.index(cancer)
        colors[n] = 'crimson'
    fig = go.Figure(data=[go.Bar(
        x=summ.index,
        y=summ['#NODE_Count'],
        marker_color=colors)], layout={'title':cancer})
    fig.show()

In [68]:
for cancer in cancers: chart(cancer)

In [None]:
count_matrix = pd.DataFrame(columns=cancers,index=cancers)
for cancer in cancers:
    count_matrix = count_matrix.append(table(cancer)['summary'].filter(['#NODE_Count']).rename(columns={'#NODE_Count':cancer}).T)
    count_matrix.dropna(how='all',inplace=True)
    count_matrix.fillna(0, inplace=True)
count_matrix

In [65]:
pct_matrix = pd.DataFrame(columns=cancers,index=cancers)
for cancer in cancers:
    pct_matrix = pct_matrix.append(table(cancer)['summary'].filter(['#NODE_Pct']).rename(columns={'#NODE_Pct':cancer}).T)
    pct_matrix.dropna(how='all',inplace=True)
    pct_matrix.fillna(0, inplace=True)
pct_matrix

Unnamed: 0,Lung Cancer,Breast Cancer,Pancreatic Cancer,Leukemia,Lymphoma,Neuroblastoma,Brain Cancer,Rhabdoid,Colon/Colorectal Cancer,Liver Cancer,Kidney Cancer,Ovarian Cancer,Esophageal Cancer,Head and Neck Cancer,Skin Cancer,Endometrial/Uterine Cancer,Bladder Cancer,Sarcoma,Bone Cancer,Eye Cancer
Lung Cancer,0.206897,0.017241,0.603448,0.0,0.0,0.034483,0.0,0.0,0.051724,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241
Breast Cancer,0.166667,0.333333,0.166667,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Pancreatic Cancer,0.384615,0.010989,0.406593,0.0,0.0,0.076923,0.0,0.0,0.032967,0.0,0.0,0.021978,0.010989,0.0,0.010989,0.010989,0.010989,0.021978,0.0,0.0
Leukemia,0.0,0.0,0.0,0.9375,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Lymphoma,0.0,0.0,0.0,0.25,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
Neuroblastoma,0.105263,0.0,0.368421,0.0,0.0,0.421053,0.052632,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Brain Cancer,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.714286,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857
Rhabdoid,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.5,0.0,0.25,0.0,0.0,0.0,0.0,0.0
Colon/Colorectal Cancer,0.428571,0.142857,0.428571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Liver Cancer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [94]:
data = []

for cancer in cancers:
    indices = list(pct_matrix.index)
    colors = ['darkslateblue',] * len(indices)
    if cancer in indices:
        n = indices.index(cancer)
        colors[n] = 'orange'
    data.append(go.Bar(name=cancer, x=pct_matrix.index, y=pct_matrix[cancer], marker_color=colors))

fig = go.Figure(data=data,layout={'title':'Percent of Nodes','showlegend':False})
# Change the bar mode
fig.update_layout(barmode='stack')
fig.show()

In [95]:
data = []

for cancer in cancers:
    indices = list(pct_matrix.index)
    colors = ['darkslateblue',] * len(indices)
    if cancer in indices:
        n = indices.index(cancer)
        colors[n] = 'orange'
    data.append(go.Bar(name=cancer, x=count_matrix.index, y=count_matrix[cancer], marker_color=colors))

fig = go.Figure(data=data,layout={'title':'Number of Nodes','showlegend':False})
# Change the bar mode
fig.update_layout(barmode='stack')
fig.show()

In [102]:
stack = count_matrix.stack().reset_index().rename(columns={'level_0':'#SOURCE','level_1':'#TARGET',0:'#WEIGHT'})
stack

Unnamed: 0,#SOURCE,#TARGET,#WEIGHT
0,Lung Cancer,Lung Cancer,12.0
1,Lung Cancer,Breast Cancer,1.0
2,Lung Cancer,Pancreatic Cancer,35.0
3,Lung Cancer,Leukemia,0.0
4,Lung Cancer,Lymphoma,0.0
...,...,...,...
395,Eye Cancer,Endometrial/Uterine Cancer,0.0
396,Eye Cancer,Bladder Cancer,1.0
397,Eye Cancer,Sarcoma,0.0
398,Eye Cancer,Bone Cancer,0.0


In [103]:
def calculate_positions(thresh_stack):

  # Create a networkx graph from the list of pairs
  G=nx.from_pandas_edgelist(thresh_stack, '#SOURCE', '#TARGET', ['#WEIGHT'])

  # Generate position data for each node:
  #pos=layout(G)
  # if weighted:
  pos=nx.spring_layout(G, weight='#WEIGHT')
      
  # Save x, y locations of each edge
  edge_x = []
  edge_y = []

  # Calculate x,y positions of an edge's 'start' (x0,y0) and 'end' (x1,y1) points
  for edge in G.edges():
      x0, y0 = pos[edge[0]]
      x1, y1 = pos[edge[1]]
      edge_x.append(x0)
      edge_x.append(x1)
      edge_y.append(y0)
      edge_y.append(y1)

  # Bundle it all up in a dict:
  edges = dict(x=edge_x,y=edge_y)

  # Save x, y locations of each node
  node_x = []
  node_y = []

  # Save node stats for annotation
  node_name = []
  node_adjacencies = []
  node_centralities = []

  # Calculate x,y positions of nodes
  for node in G.nodes():
      node_name.append(node)# Save node names
      x, y = pos[node]
      node_x.append(x)
      node_y.append(y)

  for node, adjacencies in enumerate(G.adjacency()):
      node_adjacencies.append(len(adjacencies[1]))

  for n in G.nodes():
      node_centralities.append(nx.degree_centrality(G)[n])

  # Bundle it all up in a dict:
  nodes = dict(x=node_x,y=node_y,name=node_name,adjacencies=node_adjacencies,centralities=node_centralities)

  return edges,nodes

In [107]:
[edges,nodes] = calculate_positions(stack)

In [112]:
def draw_graph(edges,nodes,title,**kwargs):

  # Draw edges
  edge_trace = go.Scatter(
      x=edges['x'], y=edges['y'],
      line=dict(width=0.5, color='#888'),
      mode='lines+markers',
      hoverinfo='text')

  # Draw nodes
  node_trace = go.Scatter(
      x=nodes['x'],
      y=nodes['y'],
      # Optional: Add labels to points *without* hovering (can get a little messy)
      mode='markers+text',
      # ...or, just add markers (no text)
      #mode='markers',
      text=nodes['name'],
      hoverinfo='text')
  filename=title.lower().replace(" ","_")

  # Color the node by its number of connections
  #node_trace.marker.color = nodes['adjacencies']
  node_trace.marker.color = nodes['centralities']
  
  # Draw figure
  fig = go.Figure(data=[edge_trace,node_trace],
            layout=go.Layout(
              title=title,
              height=600,
              titlefont_size=16,
              showlegend=False,
              hovermode='closest',
              margin=dict(b=20,l=5,r=5,t=120),
              xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
              yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
              template='plotly_white')
              )
  
  fig.update_traces(textposition='top center')
  # Show figure
  fig.show()

In [113]:
draw_graph(edges,nodes,'Network Graph of Node Counts')