In [33]:
from ipywidgets import interact, interact_manual, fixed
import numpy as np
import pandas as pd
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure, show
from bokeh.layouts import row
from bokeh.models import Label,ColumnDataSource, LabelSet
from scipy.stats import rankdata
from sklearn.preprocessing import StandardScaler
#import matplotlib.pyplot as plt

output_notebook()

In [34]:
state = {
    'notresident_notfan': 3,
    'notresident_fan': 4,
    'resident_notfan': 5,
    'resident_fan': 3,
    'meanscaled': True,
    'stdscaled': True
}

In [53]:
config = {
    'city': 'LA',
    'team': 'Lakers',
    'colors' : ['red', 'purple', 'blue', 'green'],
    'sliderscale': (0,15),
    'chartssize': 300
}

In [90]:
def rebuild(state, handle):
    #state to table
    table = []
    [table.append((0.,0.,config['colors'][0])) for _ in range(state['notresident_notfan'])]
    [table.append((0.,1.,config['colors'][1])) for _ in range(state['notresident_fan'])]
    [table.append((1.,0.,config['colors'][2])) for _ in range(state['resident_notfan'])]
    [table.append((1.,1.,config['colors'][3])) for _ in range(state['resident_fan'])]
    #or append list([(0,0)])*state['notresident_notfan']
    df = pd.DataFrame(table, columns=['resident', 'fan', 'color'])
    X = StandardScaler(with_mean=state['meanscaled'], 
            with_std=state['stdscaled']).fit_transform(
                df[['resident', 'fan']])
    cm = np.cov(X.T)
    eig_val, eig_vec = np.linalg.eig(cm)
    
    #Create the components in order of importance
    ev_order = rankdata(eig_val, method='ordinal')
    eig_vec = np.asarray([eig_vec[i-1] for i in np.flip(ev_order)])
    comp1 = eig_vec[:,0].T.dot(X.T)
    comp2 = eig_vec[:,1].T.dot(X.T)
    
    #Label Creation
    #For binary data:
    sets = df.groupby(['resident','fan']).agg('count')
    sets = sets.reset_index()
    #Add cols that dictate where the chart labels should be placed
    #in relation to the point. #Bokeh doesn't increase the axis limits
    #to accomodate labels, so this makes sure the labels show up
    #within the "square" of points
    sets['xlabel'] = np.where(
        sets['resident'] == max(sets['resident']),
        sets['resident']-.07, sets['resident']+.02)
    sets['ylabel'] = np.where(
        sets['fan'] == max(sets['fan']),
        sets['fan'] -.1, sets['fan'] +.02)
    sets.columns = ['resident','fan','txt','xlabel','ylabel']
    
    #Label creation: PCA data
    _ = pd.DataFrame({'comp1':np.round(comp1,2), 'comp2':np.round(comp2,2), 'color':df.color})
    pcaset = _.groupby(['comp1','comp2']).agg('count')
    pcaset = pcaset.reset_index()
    #Add cols that dictate where the chart labels should be placed
    pcaset['xlabel'] = np.where(
        pcaset['comp1'] == max(pcaset['comp1']),
        pcaset['comp1']-.1, pcaset['comp1']+.02)
    pcaset['ylabel'] = np.where(
        pcaset['comp2'] == max(pcaset['comp2']),
        pcaset['comp2']-.3, pcaset['comp2']+.05)
    pcaset.columns = ['comp1','comp2','txt','xlabel','ylabel']
    
    
    #Chart Assignment
    binscatter.data_source.data['x'] = df['resident']
    binscatter.data_source.data['y'] = df['fan']
    binscatter.data_source.data['fill_color'] = df['color']
    binscatter.data_source.data['line_color'] = df['color']
    pcascatter.data_source.data['x'] = comp1
    pcascatter.data_source.data['y'] = comp2
    pcascatter.data_source.data['fill_color'] = df['color']
    pcascatter.data_source.data['line_color'] = df['color']
    labels.source.data['txt'] = sets['txt']
    labels.source.data['xlabel'] = sets['xlabel']
    labels.source.data['ylabel'] = sets['ylabel']
    pcalabels.source.data['txt'] = pcaset['txt']
    pcalabels.source.data['xlabel'] = pcaset['xlabel']
    pcalabels.source.data['ylabel'] = pcaset['ylabel']
    ev1.data_source.data['x'] = [0, eig_vec[:,0][0]]
    ev1.data_source.data['y'] = [0, eig_vec[:,0][1]]
    ev2.data_source.data['x'] = [0, eig_vec[:,1][0]]
    ev2.data_source.data['y'] = [0, eig_vec[:,1][1]]
    
    
    
    push_notebook(handle=handle)

In [91]:
def update(notresident_notfan=state['notresident_notfan'],
           notresident_fan=state['notresident_fan'],
           resident_notfan=state['resident_notfan'],
           resident_fan=state['resident_fan'],
           meanscaled=True, stdscaled=True, handle=handle):
    state = {
    'notresident_notfan': notresident_notfan,
    'notresident_fan': notresident_fan,
    'resident_notfan': resident_notfan,
    'resident_fan': resident_fan,
    'meanscaled': meanscaled,
    'stdscaled': stdscaled
    }
    rebuild(state, handle)
    

In [99]:
#def init_display(state, config):
table = []
[table.append((0.,0.,config['colors'][0])) for _ in range(state['notresident_notfan'])]
[table.append((0.,1.,config['colors'][1])) for _ in range(state['notresident_fan'])]
[table.append((1.,0.,config['colors'][2])) for _ in range(state['resident_notfan'])]
[table.append((1.,1.,config['colors'][3])) for _ in range(state['resident_fan'])]
#or append list([(0,0)])*state['notresident_notfan']
df = pd.DataFrame(table, columns=['resident', 'fan', 'color'])
X = StandardScaler(with_mean=state['meanscaled'], 
        with_std=state['stdscaled']).fit_transform(
            df[['resident', 'fan']])
cm = np.cov(X.T)
eig_val, eig_vec = np.linalg.eig(cm)

#Create the components in order of importance
ev_order = rankdata(eig_val, method='ordinal')
eig_vec = np.asarray([eig_vec[i-1] for i in np.flip(ev_order)])
comp1 = eig_vec[:,0].T.dot(X.T)
comp2 = eig_vec[:,1].T.dot(X.T)

#labels creation
sets = df.groupby(['resident','fan']).agg('count')
sets = sets.reset_index()
sets['xlabel'] = np.where(
    sets['resident'] == max(sets['resident']),
    sets['resident']-.07, sets['resident']+.02)
sets['ylabel'] = np.where(
    sets['fan'] == max(sets['fan']),
    sets['fan'] -.1, sets['fan'] +.02)
sets.columns = ['resident','fan','txt','xlabel','ylabel']
#Label creation: PCA data
_ = pd.DataFrame({'comp1':np.round(comp1,2), 'comp2':np.round(comp2,2), 'color':df.color})
pcaset = _.groupby(['comp1','comp2']).agg('count')
pcaset = pcaset.reset_index()
#Add cols that dictate where the chart labels should be placed
pcaset['xlabel'] = np.where(
    pcaset['comp1'] == max(pcaset['comp1']),
    pcaset['comp1']-.1, pcaset['comp1']+.02)
pcaset['ylabel'] = np.where(
    pcaset['comp2'] == max(pcaset['comp2']),
    pcaset['comp2']-.3, pcaset['comp2']+.05)
pcaset.columns = ['comp1','comp2','txt','xlabel','ylabel']

#First Chart
fig1 = figure(title = "Raw Categories", plot_width=config['chartssize'],
           plot_height=config['chartssize'],match_aspect=True)
fig1.xaxis.axis_label = config['city'] + ' Resident'
fig1.yaxis.axis_label = config['team'] + ' Fan'
fig1.toolbar.logo = None
fig1.toolbar_location = None
binscatter = fig1.circle(df['resident'], df["fan"], color=df['color'],
         fill_alpha=0.2, size=10)
#dynamically place labels
labels = LabelSet(x='xlabel',y='ylabel',text='txt',source=ColumnDataSource(sets))
fig1.add_layout(labels)

#Second Chart
fig2 = figure(title = 'Eigenvectors', plot_width=config['chartssize'],
            plot_height=config['chartssize'], match_aspect=True,
              x_range=(-1, 1), y_range=(-1, 1))
fig2.toolbar.logo = None
fig2.toolbar_location = None
ev1 = fig2.line([0, eig_vec[:,0][0]], [0,eig_vec[:,0][1]], line_width=2)
ev2 = fig2.line([0, eig_vec[:,1][0]], [0,eig_vec[:,1][1]], line_width=2)

#Third Chart
fig3 = figure(title = 'Category Decomposition', plot_width=config['chartssize'], 
             plot_height=config['chartssize'],match_aspect=True,
              x_range=(-3, 3), y_range=(-3, 3))
fig3.xaxis.axis_label = 'Component 1'
fig3.yaxis.axis_label = 'Component 2'
fig3.toolbar.logo = None
fig3.toolbar_location = None
pcascatter = fig3.circle(comp1, comp2,color=df['color'],
         fill_alpha=0.2, size=10)
#dynamically place labels
pcalabels = LabelSet(x='xlabel',y='ylabel',text='txt',source=ColumnDataSource(pcaset))

fig3.add_layout(pcalabels)
# put the results in a row
handle = show(row(fig1, fig2, fig3), notebook_handle=True)
    #return handle
    
    
#charts = init_display(state, config)
interact(update, notresident_notfan=(0,30),
         notresident_fan=config['sliderscale'],
         resident_notfan=config['sliderscale'],
         resident_fan=config['sliderscale'],
         meanscaled=True, stdscaled=True, handle=fixed(handle))

interactive(children=(IntSlider(value=3, description='notresident_notfan', max=30), IntSlider(value=4, descrip…

<function __main__.update(notresident_notfan=3, notresident_fan=4, resident_notfan=5, resident_fan=3, meanscaled=True, stdscaled=True, handle=<bokeh.io.notebook.CommsHandle object at 0x0113EF90>)>