In [150]:
from __future__ import division
import math
import pandas as pd
import numpy as np
import matplotlib as plt
from pyveplot import *
import networkx as nx
import random
from IPython.display import SVG
%matplotlib inline

In [151]:
event_colors = {'DEL': 'red',
                'INV': 'yellow',
                'DUP': 'blue',
                'BND': 'pink',
                'complex': 'purple'}

In [152]:
apgi_1953 = pd.read_table("APGI1953_Tumor_lumpy.tsv")
apgi_1955 = pd.read_table("APGI1955_Tumor_lumpy.tsv")
apgi_2049 = pd.read_table("APGI2049_Tumor_lumpy.tsv")

panc_2156 = pd.read_table("2156.tsv", names=["chrom", "chrom_b", "sv"])

In [153]:
# Filter out ALTS (GL000226.1, GL000224.1 ...)
panc_2156 = panc_2156[~panc_2156["chrom"].str.contains("GL")]

### Only intra-chromosomal events happening on APGI datasets

In [154]:
# i.e:
apgi_1953[apgi_1953["chrom"] == "4"]

Unnamed: 0,sample,chrom,sv,counts
30,APGI1953_Tumor,4,complex,15
33,APGI1953_Tumor,4,DEL,8
51,APGI1953_Tumor,4,DUP,1


## Inter and intra chromosomal events in panc datasets

In [155]:
panc_2156[panc_2156['chrom'] != panc_2156['chrom_b']].head()

Unnamed: 0,chrom,chrom_b,sv
0,1,6,BND
1,1,3,BND
2,1,3,BND
4,1,3,BND
5,1,6,BND


In [156]:
panc_2156[panc_2156["chrom"] == "1"].head()

Unnamed: 0,chrom,chrom_b,sv
0,1,6,BND
1,1,3,BND
2,1,3,BND
3,1,1,DEL
4,1,3,BND


In [157]:
panc_2156

Unnamed: 0,chrom,chrom_b,sv
0,1,6,BND
1,1,3,BND
2,1,3,BND
3,1,1,DEL
4,1,3,BND
5,1,6,BND
6,1,15,BND
7,1,8,BND
8,1,8,BND
9,1,1,DEL


In [158]:
def groupby_chrom_sv(df):
    """ Original df and grouped by cols
    """
    # inter-chrom format
    if "chrom_b" in df.columns:
        by_chrom_sv = df.groupby(["chrom", "sv"]).count()["chrom_b"].reset_index(name="counts")
        by_chrom_sv = by_chrom_sv.groupby(["chrom", "sv", "counts"])
    else:
        by_chrom_sv = df.groupby(["chrom", "sv", "counts"])
    
    return df, by_chrom_sv

In [159]:
#panc_2156_cnts = panc_2156.groupby(["chrom", "sv"]).count()["chrom_b"].reset_index(name="counts")
#panc_2156_cnts.head()

#apgi_1953_grps = groupby_chrom_sv(apgi_1953)
#panc_2156_cnts.groupby(["chrom", "sv", "counts"]).groups, apgi_1953_grps.groups

## Plot a hiveplot given a pandas dataframe

In [168]:
def hiveplot(fname, dataframe):
    # a network
    g = nx.Graph()

    # our hiveplot object
    h = Hiveplot('{}.svg'.format(fname))

                  # start      end
    axis0 = Axis((200,200), (200,100), stroke="grey")
    axis1 = Axis((200,200), (300,300), stroke="blue", stroke_width=1.2)
    axis2 = Axis((200,200), (10,310), stroke="black", stroke_width=3)

    h.axes = [ axis0, axis1, axis2 ]
    
    for k, v in dataframe[1].groups.iteritems():
        g.add_node(k[0])        
        g.add_edge(k[0], k[2], event=k[1], count=v[0])
        
        #print k[0], k[2]

    for n in g.nodes():
        # Separate instances for the axis, otherwise loops in itself.
        node = Node(n)
        node2 = Node(n)
        node3 = Node(n)

        # XXX: Find a better (more uniform) function than ord? 
        # A small hash function would be prob better here.
        # Calculates the offset of the chromosomes in the axis.
        # XXX: Refactor this horrible code

        n = str(n)
        
        if len(n) == 1:
            offset_axis0 = ord(n) - 30
            offset_axis1 = ord(n) - 20
            offset_axis2 = ord(n) - 30
        else:
            chrom_offset = 0
            for char in n:
                chrom_offset = chrom_offset + ord(char)

            offset_axis0 = chrom_offset
            offset_axis1 = chrom_offset
            offset_axis2 = chrom_offset

        offset_axis0 = offset_axis0/120
        offset_axis1 = offset_axis1/120
        offset_axis2 = offset_axis2/120

        axis0.add_node(node, offset_axis0)
        axis1.add_node(node2, offset_axis1)
        axis2.add_node(node3, offset_axis2)

    for e in g.edges():
        edge_data = g.get_edge_data(*e)

        if e[0] != e[1] and random.choice([0,1]):
            print e[0], e[1]
            print("intra plotting now!")
            h.connect(axis0, e[1], 45, 
                      axis1, e[0], -45, 
                      stroke_width=edge_data['count']/10, stroke_opacity="0.4", 
                      stroke=event_colors[edge_data['event']])
        
        # inter-chromosomal axis
        else:
            #print("inter plotting now!")
            h.connect(axis1, e[1], 15, 
                      axis2, e[0], -15, 
                      stroke_width=edge_data['count']/10, stroke_opacity="0.4", 
                      stroke=event_colors[edge_data['event']])

    h.save()

In [169]:
#hiveplot("apgi_1953", groupby_chrom_sv(apgi_1953)[1])
#hiveplot("apgi_1955", groupby_chrom_sv(apgi_1955)[1])
hiveplot("apgi_2049", groupby_chrom_sv(apgi_2049))
hiveplot("panc_2156", groupby_chrom_sv(panc_2156))

1 11
intra plotting now!
1 12
intra plotting now!
1 20
intra plotting now!
1 21
intra plotting now!
1 17
intra plotting now!
1 16
intra plotting now!
1 15
intra plotting now!
1 6
intra plotting now!
2 11
intra plotting now!
2 10
intra plotting now!
2 13
intra plotting now!
2 18
intra plotting now!
2 1
intra plotting now!
2 3
intra plotting now!
3 19
intra plotting now!
3 1
intra plotting now!
3 5
intra plotting now!
3 14
intra plotting now!
3 7
intra plotting now!
4 19
intra plotting now!
4 4
intra plotting now!
5 12
intra plotting now!
5 21
intra plotting now!
9 Y
intra plotting now!
9 17
intra plotting now!
11 3
intra plotting now!
1 11
intra plotting now!
1 17
intra plotting now!
1 18
intra plotting now!
1 15
intra plotting now!
1 X
intra plotting now!
1 2
intra plotting now!
1 7
intra plotting now!
1 6
intra plotting now!
1 9
intra plotting now!
2 13
intra plotting now!
2 12
intra plotting now!
2 8
intra plotting now!
3 10
intra plotting now!
3 4
intra plotting now!
3 7
intra plott