In [None]:
from __future__ import division
import math
import pandas as pd
import numpy as np
import matplotlib as plt
from pyveplot import *
import networkx as nx
import random
from IPython.display import SVG
%matplotlib inline

In [None]:
event_colors = {'DEL': 'red',
                'INV': 'yellow',
                'DUP': 'blue',
                'BND': 'pink',
                'complex': 'purple'}

In [None]:
apgi_1953 = pd.read_table("APGI1953_Tumor_lumpy.tsv")
apgi_1955 = pd.read_table("APGI1955_Tumor_lumpy.tsv")
apgi_2049 = pd.read_table("APGI2049_Tumor_lumpy.tsv")

panc_2156 = pd.read_table("2156.tsv", names=["chrom", "chrom_b", "sv"])

### Only intra-chromosomal events happening on APGI datasets

In [None]:
# i.e:
apgi_1953[apgi_1953["chrom"] == "4"]

## Inter and intra chromosomal events in panc datasets

In [None]:
panc_2156[panc_2156['chrom'] != panc_2156['chrom_b']].head()

In [None]:
panc_2156[panc_2156["chrom"] == "1"].head()

In [None]:
panc_2156_cnts = panc_2156.groupby(["chrom", "sv"]).count()["chrom_b"].reset_index(name="counts")
panc_2156_cnts.head()

apgi_1953_grps = groupby_chrom_sv(apgi_1953)
panc_2156_cnts.groupby(["chrom", "sv", "counts"]).groups, apgi_1953_grps.groups

In [None]:
def groupby_chrom_sv(df):
    by_chrom_sv = df.groupby(["chrom", "sv"])

    # inter-chrom format
    if "chrom_b" in df.columns:
        by_chrom_sv = by_chrom_sv.count()["chrom_b"].reset_index(name="counts")
        
    # intra-chrom (add 0 to tuple to indicate self-loop)
    if len(by_chrom_sv[0]) < 3:
        print by_chrom_sv['1']
    
    return by_chrom_sv

## Plot a hiveplot given a pandas dataframe

In [None]:
def hiveplot(fname, dataframe):
    # a network
    g = nx.Graph()
    i = nx.Graph()

    # our hiveplot object
    h = Hiveplot('{}.svg'.format(fname))

                  # start      end
    axis0 = Axis((200,200), (200,100), stroke="black")
    axis1 = Axis((200,200), (100,300), stroke="black", stroke_width=1.2)

    h.axes = [ axis0, axis1 ]

    
    for k, v in dataframe.groups.iteritems():
        g.add_node(k[0])
        # XXX: Same indexes since it is intra-chromosomal, needs to be generalized
        g.add_edge(k[0], k[0], event=k[1], count=v[0])


    for n in g.nodes():
        # Separate instances for the axis, otherwise loops in itself.
        node = Node(n)
        node2 = Node(n)

        # XXX: Find a better (more uniform) function than ord? 
        # A small hash function would be prob better here.
        # Calculates the offset of the chromosomes in the axis. 

        if len(n) == 1:
            offset_axis0 = ord(n) - 30
            offset_axis1 = ord(n) - 20
        else:
            chrom_offset = 0
            for char in n:
                chrom_offset = chrom_offset + ord(char)

            offset_axis0 = chrom_offset
            offset_axis1 = chrom_offset

        offset_axis0 = offset_axis0/120
        offset_axis1 = offset_axis1/120

        axis0.add_node(node, offset_axis0)
        axis1.add_node(node2, offset_axis1)

    for e in g.edges():
        edge_data = g.get_edge_data(*e)

        h.connect(axis1, e[1], 45, 
                  axis0, e[0], -45, 
                  stroke_width=edge_data['count']/10, stroke_opacity="0.4", stroke=event_colors[edge_data['event']])

    h.save()
    #nx.draw_networkx(g)

In [None]:
#hiveplot("apgi_1953", groupby_chrom_sv(apgi_1953))
#hiveplot("apgi_1955", groupby_chrom_sv(apgi_1955))
#hiveplot("apgi_2049", groupby_chrom_sv(apgi_2049))
hiveplot("panc_2156", groupby_chrom_sv(panc_2156))