In [16]:
%matplotlib notebook
%reload_ext autoreload
%autoreload 2

# relative imports
import PCAPlotter
import PCA_runner

import os
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
np.version.version

# bokeh stuff
from bokeh.io import push_notebook, show, output_notebook # for plotting inline
from bokeh.plotting import figure, output_file, Figure
# from bokeh.plotting import Figure, show, output_file # for saving to file
from ipywidgets import interact
from bokeh.models import HoverTool
from bokeh.models import ColumnDataSource

output_notebook()

counts = '/home/bay001/projects/codebase/data/matrix.txt'
goi = '/home/bay001/projects/codebase/data/test_neuronal_markers.txt'
starting_gene = 'ENSG00000187634'

# counts = '/home/bay001/projects/codebase/clustering/examples/iris.txt'
# cond = pd.read_table('/home/bay001/projects/codebase/clustering/examples/iris.names',index_col=0)

discreet = False # use continuous color map
is_log = True # log transform the dataset
is_subset = True # perform PCA on a subset of the genes in counts

df = pd.read_table(counts, index_col=0)

def color_by_expression(df, gene_id, cmap='Blues'):
    """
    Usually for continuous color mappings (SINGLE CELL RNA-SEQ)
    returns a dataframe of {sample:{color:COLOR, condition:EXPRESSION}}
    where COLOR matches the EXPRESSION level defined by the dataframe.
    """
    colors = {}
    cmap = plt.get_cmap(cmap)
    expr = df.loc[gene_id]
    expr = np.log2(expr+1)
    expr = (expr) / (expr.max() - expr.min())
    for key, value in expr.iteritems():
        rgb = cmap(value)
        colors[key] = {'color': float_to_hex(rgb), 'condition': 'expression'}
    return pd.DataFrame(colors).T

def color_by_condition(conditions, col_string):
    """
    Usually for discrete color mappings (RNA-SEQ)
    returns a dataframe of {sample:{color:COLOR, condition:CONDITION}}
    where COLOR matches each CONDITION defined by the dataframe.
    """
    max_conditions = set(conditions[col_string])
    colors = sns.color_palette("hls", (len(max_conditions)))
    colormap = {}
    c = 0
    for condition in max_conditions:
        colormap[condition] = {
            'color':float_to_hex(colors[c]),
            'condition':condition
        }
        c+=1
    colormapped = conditions[col_string].apply(lambda x: colormap[x])
    return pd.DataFrame(dict(colormapped)).T

def float_to_hex(c):
    """
    returns the #hex representation of the RGB
    """
    return '#%02x%02x%02x' % (c[0] * 255, c[1] * 255, c[2] * 255)


def update_expression(f):
    """
    when called, sets a new colormap
    """
    r.set_color(color_by_expression(df,f))
    push_notebook()

def update_condition(f):
    """
    when called, sets a new colormap
    """
    r.set_color(color_by_condition(cond,f))
    push_notebook()


""" Define the tools needed """
hover = HoverTool(
        tooltips=[
            ("index", "@idx")
        ]
    )

""" Instantiate bokeh figure """
px = figure(tools=[hover])

if(is_log):
    df = np.log2(df+1)

df_sub = PCA_runner.subset(df,goi) if is_subset else df

""" build the sample::color:condition dataframe needed for legend """


if(discreet):
    color_df = color_by_condition(cond, 'names')
    markers = list(cond.columns)
    interact(update_condition, f=markers)
else:
    color_df = color_by_expression(df, starting_gene)
    markers = sorted(list(set([line.strip() for line in open(goi,'r')]).intersection(set(df.index))))
    interact(update_expression, f=markers)
    
r = PCAPlotter.pcaplot(df_sub, colors=color_df, bokeh=True, ax=px, cmap='Purples')




In [17]:
show(px, notebook_handle=True)