In [1]:
from ipywidgets import interact, interact_manual, fixed, interactive_output, Layout
import ipywidgets as widgets
import numpy as np
import pandas as pd
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure, show
from bokeh.layouts import row
from bokeh.models import Label,ColumnDataSource, LabelSet, Legend, Slope
from scipy.stats import rankdata
from sklearn.preprocessing import StandardScaler, MinMaxScaler
output_notebook()

In [2]:
state = {
    'notresident_notfan': 3,
    'notresident_fan': 4,
    'resident_notfan': 5,
    'resident_fan': 3,
    'showeig' : True,
    'meanscaled': True,
    'stdscaled': True
}

In [3]:
config = {
    'city': 'LA',
    'team': 'Lakers',
    'colors' : ['#FF0000', '#800080', '#0000FF', '#00FF00'],
    'sliderscale': (0,15),
    'chartssize': 300
}

In [4]:
def rebuild(state, handle):
    #state to table
    table = []
    [table.append((0.,0.,config['colors'][0])) for _ in range(state['notresident_notfan'])]
    [table.append((0.,1.,config['colors'][1])) for _ in range(state['notresident_fan'])]
    [table.append((1.,0.,config['colors'][2])) for _ in range(state['resident_notfan'])]
    [table.append((1.,1.,config['colors'][3])) for _ in range(state['resident_fan'])]
    #or append list([(0,0)])*state['notresident_notfan']
    df = pd.DataFrame(table, columns=['resident', 'fan', 'color'])
    X = StandardScaler(with_mean=state['meanscaled'], 
            with_std=state['stdscaled']).fit_transform(
                df[['resident', 'fan']])
    cm = np.cov(X.T)
    eig_val, eig_vec = np.linalg.eig(cm)
    
    #Create the components in order of importance
    ev_order = rankdata(eig_val, method='ordinal')
    eig_vec = np.asarray([eig_vec[i-1] for i in np.flip(ev_order)])
    eig_val = np.asarray([eig_val[i-1] for i in np.flip(ev_order)])
    comp1 = eig_vec[:,0].T.dot(X.T)
    comp2 = eig_vec[:,1].T.dot(X.T)
    
    #Label Creation
    #For binary data:
    sets = df.groupby(['resident','fan']).agg('count')
    sets = sets.reset_index()
    #Add cols that dictate where the chart labels should be placed
    #in relation to the point. #Bokeh doesn't increase the axis limits
    #to accomodate labels, so this makes sure the labels show up
    #within the "square" of points
    sets['xlabel'] = np.where(
        sets['resident'] == max(sets['resident']),
        sets['resident']-.03, sets['resident']-.03)
    sets['ylabel'] = np.where(
        sets['fan'] == max(sets['fan']),
        sets['fan'] -.2, sets['fan'] +.07)
    sets.columns = ['resident','fan','txt','xlabel','ylabel']
    
    #Label creation: PCA data
    _ = pd.DataFrame({'comp1':np.round(comp1,2), 'comp2':np.round(comp2,2), 'color':df.color})
    pcaset = _.groupby(['comp1','comp2']).agg('count')
    pcaset = pcaset.reset_index()
    #Add cols that dictate where the chart labels should be placed
    pcaset['xlabel'] = np.where(
        pcaset['comp1'] == max(pcaset['comp1']),
        pcaset['comp1']-.1, pcaset['comp1']-.1)
    pcaset['ylabel'] = np.where(
        pcaset['comp2'] == max(pcaset['comp2']),
        pcaset['comp2']+.1, pcaset['comp2']+.1)
    pcaset.columns = ['comp1','comp2','txt','xlabel','ylabel']
    
    
    #Chart Assignment
    binscatter.data_source.data['x'] = df['resident']
    binscatter.data_source.data['y'] = df['fan']
    binscatter.data_source.data['fill_color'] = df['color']
    binscatter.data_source.data['line_color'] = df['color']
    pcascatter.data_source.data['x'] = comp1
    pcascatter.data_source.data['y'] = comp2
    pcascatter.data_source.data['fill_color'] = df['color']
    pcascatter.data_source.data['line_color'] = df['color']
    labels.source.data['txt'] = sets['txt']
    labels.source.data['xlabel'] = sets['xlabel']
    labels.source.data['ylabel'] = sets['ylabel']
    pcalabels.source.data['txt'] = pcaset['txt']
    pcalabels.source.data['xlabel'] = pcaset['xlabel']
    pcalabels.source.data['ylabel'] = pcaset['ylabel']
    
    #Update Eigenvectors
    eig1x = [0.5, 0.5+eig_vec[:,0][0]*eig_val[0]]
    eig1y = [0.5, 0.5+eig_vec[:,0][1]*eig_val[0]]
    #Find it's slope and intercept
    eig1slope = (eig1y[1] - eig1y[0])/(eig1x[1] - eig1x[0])
    eig1b = eig1y[0] - eig1slope * eig1x[0]
    #
    eig2x = [0.5, 0.5+eig_vec[:,1][0]*eig_val[1]]
    eig2y = [0.5, 0.5+eig_vec[:,1][1]*eig_val[1]]
    #Find it's slope and intercept
    eig2slope = (eig2y[1] - eig2y[0])/(eig2x[1] - eig2x[0])
    eig2b = eig2y[0] - eig2slope * eig2x[0]

    ev1.data_source.data['x'] = eig1x
    ev1.data_source.data['y'] = eig1y
    ev2.data_source.data['x'] = eig2x
    ev2.data_source.data['y'] = eig2y
#     ev3.data_source.data['x'] = eig1x
#     ev3.data_source.data['y'] = eig1y
#     ev4.data_source.data['x'] = eig2x
#     ev4.data_source.data['y'] = eig2y
    
    ev1ax.gradient=eig1slope
    ev1ax.y_intercept=eig1b
    ev2ax.gradient=eig2slope
    ev2ax.y_intercept=eig2b
    
    
    ev1.visible=state['showeig']
    ev1ax.visible=state['showeig']
    ev2.visible=state['showeig']
    ev2ax.visible=state['showeig']
#     ev3.visible=state['showeig']
#     ev4.visible=state['showeig']
    
    if state['showeig']:
        fig3.xaxis.axis_line_color = '#FF00FF'
        fig3.yaxis.axis_line_color = '#008080'
        fig3.xaxis.axis_line_width = 3
        fig3.yaxis.axis_line_width = 3
    else:
        fig3.xaxis.axis_line_color = '#000000'
        fig3.yaxis.axis_line_color = '#000000'
        fig3.xaxis.axis_line_width = 1
        fig3.yaxis.axis_line_width = 1

    push_notebook(handle=handle)

In [5]:
table = []
[table.append((0.,0.,config['colors'][0])) for _ in range(state['notresident_notfan'])]
[table.append((0.,1.,config['colors'][1])) for _ in range(state['notresident_fan'])]
[table.append((1.,0.,config['colors'][2])) for _ in range(state['resident_notfan'])]
[table.append((1.,1.,config['colors'][3])) for _ in range(state['resident_fan'])]

df = pd.DataFrame(table, columns=['resident', 'fan', 'color'])
X = StandardScaler(with_mean=state['meanscaled'], 
        with_std=state['stdscaled']).fit_transform(
            df[['resident', 'fan']])
cm = np.cov(X.T)
eig_val, eig_vec = np.linalg.eig(cm)

#Create the components in order of importance
ev_order = rankdata(eig_val, method='ordinal')
eig_vec = np.asarray([eig_vec[i-1] for i in np.flip(ev_order)])
eig_val = np.asarray([eig_val[i-1] for i in np.flip(ev_order)])
comp1 = eig_vec[:,0].T.dot(X.T)
comp2 = eig_vec[:,1].T.dot(X.T)


#labels creation
sets = df.groupby(['resident','fan']).agg('count')
sets = sets.reset_index()
sets['xlabel'] = np.where(
    sets['resident'] == max(sets['resident']),
    sets['resident']-.03, sets['resident']-.03)
sets['ylabel'] = np.where(
    sets['fan'] == max(sets['fan']),
    sets['fan'] -.2, sets['fan'] +.07)
sets.columns = ['resident','fan','txt','xlabel','ylabel']
#Label creation: PCA data
_ = pd.DataFrame({'comp1':np.round(comp1,2), 'comp2':np.round(comp2,2), 'color':df.color})
pcaset = _.groupby(['comp1','comp2']).agg('count')
pcaset = pcaset.reset_index()
#Add cols that dictate where the chart labels should be placed
pcaset['xlabel'] = np.where(
    pcaset['comp1'] == max(pcaset['comp1']),
    pcaset['comp1']-.1, pcaset['comp1']+.02)
pcaset['ylabel'] = np.where(
    pcaset['comp2'] == max(pcaset['comp2']),
    pcaset['comp2']-.3, pcaset['comp2']+.05)
pcaset.columns = ['comp1','comp2','txt','xlabel','ylabel']

#First Chart
fig1 = figure(title = "Raw Categories", plot_width=config['chartssize'],
           plot_height=config['chartssize'],match_aspect=True,
              x_range=(-0.5, 1.5), y_range=(-0.5, 1.5))
fig1.xaxis.axis_label = config['city'] + ' Resident'
fig1.yaxis.axis_label = config['team'] + ' Fan'
fig1.toolbar.logo = None
fig1.toolbar_location = None
binscatter = fig1.circle(df['resident'], df["fan"], color=df['color'],
         fill_alpha=0.2, size=10)

#plot first eigenvector to have an 'origin' of .5, so it's in
#the middle of the square of data
eig1x = [0.5, 0.5+eig_vec[:,0][0]*eig_val[0]]
eig1y = [0.5, 0.5+eig_vec[:,0][1]*eig_val[0]]
#Find it's slope and intercept
eig1slope = (eig1y[1] - eig1y[0])/(eig1x[1] - eig1x[0])
eig1b = eig1y[0] - eig1slope * eig1x[0]

ev1ax = Slope(gradient=eig1slope, y_intercept=eig1b,
             line_dash='dotted', line_color='grey')
fig1.add_layout(ev1ax)
ev1 = fig1.line(eig1x,eig1y,
                line_width=2, color='#FF00FF')
#plot the second eigenvector orthogonal to the first,
#intersecting it at .5 to ensure centeredeness

eig2x = [0.5, 0.5+eig_vec[:,1][0]*eig_val[1]]
eig2y = [0.5, 0.5+eig_vec[:,1][1]*eig_val[1]]
#Find it's slope and intercept
eig2slope = (eig2y[1] - eig2y[0])/(eig2x[1] - eig2x[0])
eig2b = eig2y[0] - eig2slope * eig2x[0]

ev2ax = Slope(gradient=eig2slope, y_intercept=eig2b,
             line_dash='dotted', line_color='grey')
fig1.add_layout(ev2ax)

ev2 = fig1.line(eig2x, eig2y,
                line_width=2, color='#008080')

#dynamically place labels
labels = LabelSet(x='xlabel',y='ylabel',text='txt',source=ColumnDataSource(sets))
fig1.add_layout(labels)

#Second Chart
fig2 = figure(title = '', plot_width=config['chartssize'],
            plot_height=config['chartssize'], match_aspect=True,
              x_range=(-2, 2), y_range=(-2, 2))
fig2.toolbar.logo = None
fig2.toolbar_location = None
# ev3 = fig2.line([0, eig_vec[:,0][0]*eig_val[0]],
#                 [0,eig_vec[:,0][1]*eig_val[0]],
#                 line_width=2, color='#FF00FF', legend='Eigenvector 1')
# ev4 = fig2.line([0, eig_vec[:,1][0]*eig_val[1]],
#                 [0,eig_vec[:,1][1]*eig_val[1]],
#                 line_width=2, color='#008080', legend='Eigenvector 2')
# fig2.legend.location = "top_center"
# fig2.legend.orientation = "horizontal"
# fig2.legend.background_fill_alpha = .1
# fig2.legend.background_fill_color = 'grey'
# fig2.legend.margin = 1
# fig2.legend.spacing = 35


#Third Chart
fig3 = figure(title = 'Category Decomposition', plot_width=config['chartssize'], 
             plot_height=config['chartssize'],match_aspect=True,
              x_range=(-3.5, 3.5), y_range=(-3.5, 3.5))
fig3.xaxis.axis_label = 'Component 1'
fig3.yaxis.axis_label = 'Component 2'
fig3.toolbar.logo = None
fig3.toolbar_location = None
pcascatter = fig3.circle(comp1, comp2,color=df['color'],
         fill_alpha=0.2, size=10)
fig3.xaxis.axis_line_width = 3
fig3.yaxis.axis_line_width = 3

#dynamically place labels
pcalabels = LabelSet(x='xlabel',y='ylabel',text='txt',source=ColumnDataSource(pcaset))

fig3.add_layout(pcalabels)
# put the results in a row

In [14]:
handle = show(row(fig1, fig3, fig2), notebook_handle=True)
    
def update(notresident_notfan=state['notresident_notfan'],
           notresident_fan=state['notresident_fan'],
           resident_notfan=state['resident_notfan'],
           resident_fan=state['resident_fan'],
           showeig=state['showeig'],
           meanscaled=state['meanscaled'], 
           stdscaled=state['stdscaled'], handle=handle):
    state = {
    'notresident_notfan': notresident_notfan,
    'notresident_fan': notresident_fan,
    'resident_notfan': resident_notfan,
    'resident_fan': resident_fan,
    'showeig': showeig,
    'meanscaled': meanscaled,
    'stdscaled': stdscaled
    }
    rebuild(state, handle)
    

nn = widgets.IntSlider(value=state['notresident_notfan'],
                       min=config['sliderscale'][0],
                       max=config['sliderscale'][1],
                       description='Non-resident, Non-fans',
                       style = {'description_width': 'initial'},
                       step=1, layout=Layout(width='50%'))
nf = widgets.IntSlider(value=state['notresident_fan'],
                       min=config['sliderscale'][0],
                       max=config['sliderscale'][1],
                       description='Non-resident Fans',
                       style = {'description_width': 'initial'},
                       step=1, layout=Layout(width='50%'))
rn = widgets.IntSlider(value=state['resident_notfan'],
                       min=config['sliderscale'][0],
                       max=config['sliderscale'][1],
                       description='Resident, Non-fans',
                       style = {'description_width': 'initial'},
                       step=1, layout=Layout(width='50%'))
rf = widgets.IntSlider(value=state['resident_fan'],
                       min=config['sliderscale'][0],
                       max=config['sliderscale'][1],
                       description='Resident Fans',
                       style = {'description_width': 'initial'},
                       step=1, layout=Layout(width='50%'))
eb = widgets.ToggleButton(value=True, icon='check',
                          description='Show Eigenvectors (Secondary Axis Overlay)',
                          layout=Layout(width='33.3%'))
ms = widgets.ToggleButton(value=True, icon='check',
                          description='Scale Mean to Zero Before Decomposition',
                          layout=Layout(width='33.3%'))
ss = widgets.ToggleButton(value=True, icon='check',
                          description='Scale Std. Dev. to 1 Before Decomposition',
                          style = {'description_width': 'initial'},
                          layout=Layout(width='33.3%'))
toprow = widgets.HBox([nn, rn])
midrow = widgets.HBox([nf, rf])
botrow = widgets.HBox([eb, ms, ss])

interactive_output(update,{'notresident_notfan': nn,
                           'notresident_fan' : nf,
                           'resident_notfan' : rn,
                           'resident_fan' : rf,
                           'showeig' : eb,
                           'meanscaled' : ms,
                           'stdscaled' : ss,
                           'handle': fixed(handle)})

display(toprow, midrow, botrow)

HBox(children=(IntSlider(value=3, description='Non-resident, Non-fans', layout=Layout(width='50%'), max=15, st…

HBox(children=(IntSlider(value=4, description='Non-resident Fans', layout=Layout(width='50%'), max=15, style=S…

HBox(children=(ToggleButton(value=True, description='Show Eigenvectors (Secondary Axis Overlay)', icon='check'…

In [7]:
from bokeh.io import save

In [8]:
#save([fig1,fig3],'chart.html')

In [9]:
table

[(0.0, 0.0, '#FF0000'),
 (0.0, 0.0, '#FF0000'),
 (0.0, 0.0, '#FF0000'),
 (0.0, 1.0, '#800080'),
 (0.0, 1.0, '#800080'),
 (0.0, 1.0, '#800080'),
 (0.0, 1.0, '#800080'),
 (1.0, 0.0, '#0000FF'),
 (1.0, 0.0, '#0000FF'),
 (1.0, 0.0, '#0000FF'),
 (1.0, 0.0, '#0000FF'),
 (1.0, 0.0, '#0000FF'),
 (1.0, 1.0, '#00FF00'),
 (1.0, 1.0, '#00FF00'),
 (1.0, 1.0, '#00FF00')]