In [1]:
from ipywidgets import interact_manual, fixed
import numpy as np
import pandas as pd
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure, show
from bokeh.layouts import row
from bokeh.models import Label,ColumnDataSource, LabelSet
from scipy.stats import rankdata
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

output_notebook()

In [93]:
#Create Fake Data
fake = pd.DataFrame({
    'la-resident': [1.,0.,1.,1.,0.,0.,0.,1.,1.,1.,1.,1.,0.,0.,0.],
    'fan-of-lakers': [1.,0.,1.,1.,0.,0.,1.,0.,0.,0.,0.,0.,1.,1.,1.], 
    'color' : ['green', 'red', 'green', 'green', 'red', 'red', 'purple',
               'blue', 'blue', 'blue', 'blue', 'blue', 'purple', 'purple', 'purple']  
})
fake

Unnamed: 0,la-resident,fan-of-lakers,color
0,1.0,1.0,green
1,0.0,0.0,red
2,1.0,1.0,green
3,1.0,1.0,green
4,0.0,0.0,red
5,0.0,0.0,red
6,0.0,1.0,purple
7,1.0,0.0,blue
8,1.0,0.0,blue
9,1.0,0.0,blue


In [94]:
sets = fake.groupby(['la-resident','fan-of-lakers']).agg('count')
sets = sets.reset_index()
#Add cols that dictate where the chart labels should be placed
#in relation to the point. #Bokeh doesn't increase the axis limits
#to accomodate labels, so this makes sure the labels show up
#within the "square" of points
sets['xlabel'] = np.where(
    sets['la-resident'] == max(sets['la-resident']),
    sets['la-resident']-.07, sets['la-resident']+.02)
sets['ylabel'] = np.where(
    sets['fan-of-lakers'] == max(sets['fan-of-lakers']),
    sets['fan-of-lakers'] -.1, sets['fan-of-lakers'] +.02)
sets.columns = ['la-resident','fan-of-lakers','txt','xlabel','ylabel']
sets

Unnamed: 0,la-resident,fan-of-lakers,txt,xlabel,ylabel
0,0.0,0.0,3,0.02,0.02
1,0.0,1.0,4,0.02,0.9
2,1.0,0.0,5,0.93,0.02
3,1.0,1.0,3,0.93,0.9


In [95]:
#Create table of the 4 clusters created by components
meanScaling = True
stdScaling = True

X = StandardScaler(with_mean=meanScaling, 
                   with_std=stdScaling).fit_transform(fake[['la-resident', 'fan-of-lakers']])
eig_val_cov, eig_vec_cov = np.linalg.eig(np.cov(X.T))
#Create the components in order of importance
ev_order = rankdata(eig_val_cov, method='ordinal')
eig_vec_cov = np.asarray([eig_vec_cov[i-1] for i in np.flip(ev_order)])
comp1 = eig_vec_cov[:,0].T.dot(X.T)
comp2 = eig_vec_cov[:,1].T.dot(X.T)
_ = pd.DataFrame({0:np.round(comp1,2), 1:np.round(comp2,2), 'color':fake.color})
pcaset = _.groupby([0,1]).agg('count')
pcaset = pcaset.reset_index()
#Add cols that dictate where the chart labels should be placed
pcaset['xNudge'] = np.where(
    pcaset[0] == max(pcaset[0]),
    -.2, .07)
pcaset['yNudge'] = np.where(
    pcaset[1] == max(pcaset[1]),
    -.25, .02)
pcaset.columns = [0,1,'txt','xNudge','yNudge']
display(pcaset)

Unnamed: 0,0,1,txt,xNudge,yNudge
0,-1.51,0.0,4,0.07,0.02
1,-0.09,-1.42,3,0.07,0.02
2,-0.09,1.42,3,0.07,-0.25
3,1.32,0.0,5,-0.2,0.02


In [96]:
#Initiate 3 charts in a row: initial data, eigenvectors, PCA data:
s = 300 #Size of charts

#First Chart
p = figure(title = "Binary Dataset", plot_width=s, plot_height=s,match_aspect=True)
p.xaxis.axis_label = 'LA Resident'
p.yaxis.axis_label = 'Lakers Fan'
b = p.circle(fake['la-resident'], fake["fan-of-lakers"], color=fake['color'],
         fill_alpha=0.2, size=10)
#dynamically place labels
labels = LabelSet(x='xlabel',y='ylabel',text='txt',source=ColumnDataSource(sets))

p.add_layout(labels)


# for i in sets.iterrows():
#     l = Label(x=i[1][0]+i[1][3], y=i[1][1]+i[1][4], text=str(int(i[1][2])))
#     p.add_layout(l)

#Second Chart
ev = figure(title = 'Eigenvectors, Eigenvalues', plot_width=s,
            plot_height=s, match_aspect=True)
ev1 = ev.line([0, eig_vec_cov[:,0][0]], [0,eig_vec_cov[:,0][1]], line_width=2)
ev2 = ev.line([0, eig_vec_cov[:,1][0]], [0,eig_vec_cov[:,1][1]], line_width=2)

#Third Chart
pca = figure(title = 'PCA of Binary Dataset', plot_width=s, 
             plot_height=s,match_aspect=True)
pca.xaxis.axis_label = 'Component 1'
pca.yaxis.axis_label = 'Component 2'
c = pca.circle(comp1, comp2,color=fake['color'],
         fill_alpha=0.2, size=10)
#dynamically place labels
for i in pcaset.iterrows():
    l = Label(x=i[1][0]+i[1][3], y=i[1][1]+i[1][4], text=str(int(i[1][2])))
    pca.add_layout(l)
# put the results in a row
z = show(row(p, ev, pca), notebook_handle=True)

In [112]:
print(labels.source.data['xlabel'])

[0.02 0.02 0.93 0.93]


In [103]:
dir(labels)

['__cached_all__overridden_defaults__',
 '__cached_all__properties__',
 '__cached_all__properties_with_refs__',
 '__class__',
 '__container_props__',
 '__dataspecs__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__overridden_defaults__',
 '__properties__',
 '__properties_with_refs__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__view_model__',
 '__weakref__',
 '_attach_document',
 '_callbacks',
 '_clear_extensions',
 '_clone',
 '_detach_document',
 '_document',
 '_event_callbacks',
 '_id',
 '_overridden_defaults',
 '_property_values',
 '_repr_html_',
 '_temp_document',
 '_to_json_like',
 '_trigger_event',
 '_unstable_default_values',
 '_unstable_themed_values',
 '_update_event_callbacks',
 'angle',
 'angle_units',
 'apply_theme',
 'b

In [97]:
interact_manual(update, action=['add', 'del', 'change scaling only'], 
                pt=[[0,0,'red'], [0,1,'blue'], [1,0,'purple'], [1,1,'green']],
                meanScaling=True, stdScaling=True)

interactive(children=(Dropdown(description='action', options=('add', 'del', 'change scaling only'), value='add…

<function __main__.update(action, pt, meanScaling, stdScaling)>

In [92]:
def update(action, pt, meanScaling, stdScaling):
    global fake
    if action=='add':
        fake = fake.append({'la-resident':pt[0],
                            'fan-of-lakers':pt[1],
                            'color': pt[2]}, ignore_index=True)
    elif action=='del':
        for index, row in fake.iterrows():
            if row['la-resident'] == pt[0] and row['fan-of-lakers'] == pt[1]:
                fake = fake.drop(index, inplace=True)
                break
    sets = fake.groupby(['la-resident','fan-of-lakers']).agg('count')
    sets = sets.reset_index()
    #Add cols that dictate where the chart labels should be placed
    #in relation to the point. #Bokeh doesn't increase the axis limits
    #to accomodate labels, so this makes sure the labels show up
    #within the "square" of points
    sets['xNudge'] = np.where(
        sets['la-resident'] == max(sets['la-resident']),
        -.07, .02)
    sets['yNudge'] = np.where(
        sets['fan-of-lakers'] == max(sets['fan-of-lakers']),
        -.1, .02)
    sets.columns = ['la-resident','fan-of-lakers','txt','xNudge','yNudge']
    X = StandardScaler(with_mean=meanScaling, 
                   with_std=stdScaling).fit_transform(fake[['la-resident', 'fan-of-lakers']])
    eig_val_cov, eig_vec_cov = np.linalg.eig(np.cov(X.T))
    #Create the components in order of importance
    ev_order = rankdata(eig_val_cov, method='ordinal')
    eig_vec_cov = np.asarray([eig_vec_cov[i-1] for i in np.flip(ev_order)])
    comp1 = eig_vec_cov[:,0].T.dot(X.T)
    print(len(comp1))
    comp2 = eig_vec_cov[:,1].T.dot(X.T)
    _ = pd.DataFrame({0:np.round(comp1,2), 1:np.round(comp2,2), 'color':fake.color})
    pcaset = _.groupby([0,1]).agg('count')
    pcaset = pcaset.reset_index()
    #Add cols that dictate where the chart labels should be placed
    pcaset['xNudge'] = np.where(
        pcaset[0] == max(pcaset[0]), -.2, .07)
    pcaset['yNudge'] = np.where(
        pcaset[1] == max(pcaset[1]),-.25, .02)
    
#     b = p.circle(fake['la-resident'], fake["fan-of-lakers"], color=fake['color'],
#          fill_alpha=0.2, size=10)
#     labels = LabelSet(x='xlabel',y='ylabel',text='txt',source=ColumnDataSource(sets))
#     p.add_layout(labels)
#     ev1 = ev.line([0, eig_vec_cov[:,0][0]], [0,eig_vec_cov[:,0][1]], line_width=2)
#     ev2 = ev.line([0, eig_vec_cov[:,1][0]], [0,eig_vec_cov[:,1][1]], line_width=2)
#     c = pca.circle(comp1, comp2,color=fake['color'],
#          fill_alpha=0.2, size=10)
    b.data_source.data['x'] = fake['la-resident']
    b.data_source.data['y'] = fake['fan-of-lakers']
    b.data_source.data['fill_color'] = fake['color']
    b.data_source.data['line_color'] = fake['color']
    c.data_source.data['x'] = comp1
    c.data_source.data['y'] = comp2
    c.data_source.data['fill_color'] = fake['color']
    c.data_source.data['line_color'] = fake['color']
    labels.source.data['txt'] = sets['txt']
    labels.source.data['xlabel'] = 

    


    push_notebook(handle=z)

In [73]:
b = p.circle(fake['la-resident'], fake["fan-of-lakers"], color=fake['color'],
          fill_alpha=0.2, size=10)
b.data_source.data['line_color']

0      green
1        red
2      green
3      green
4        red
5        red
6     purple
7       blue
8       blue
9       blue
10      blue
11      blue
12    purple
13    purple
14    purple
15       red
Name: color, dtype: object

In [33]:
x = np.linspace(0, 2*np.pi, 2000)
y = np.sin(x)

In [42]:
p = figure(title="simple line example", plot_height=300, plot_width=600, y_range=(-5,5),
           background_fill_color='#efefef')
r = p.line(x, y, color="#8888cc", line_width=1.5, alpha=0.8)
type(p)

bokeh.plotting.figure.Figure

In [50]:
def update(f, w=1, A=1, phi=0, l=2000):
    if   f == "sin": func = np.sin
    elif f == "cos": func = np.cos
    x = np.linspace(0, 2*np.pi, l)
    r.data_source.data['x'] = x
    r.data_source.data['y'] = A * func(w * x + phi)
    push_notebook()

In [51]:
show(p, notebook_handle=True)

In [52]:
from ipywidgets import interact
interact(update, f=["sin", "cos"], w=(0,50), A=(1,10), phi=(0, 20, 0.1), l=(0,2000))


interactive(children=(Dropdown(description='f', options=('sin', 'cos'), value='sin'), IntSlider(value=1, descr…

<function __main__.update(f, w=1, A=1, phi=0, l=2000)>