In [2]:
from ipywidgets import interact_manual, fixed
import numpy as np
import pandas as pd
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure, show
from bokeh.layouts import row
from bokeh.models import Label,ColumnDataSource, LabelSet
from scipy.stats import rankdata
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

output_notebook()

In [34]:
#Create Fake Data
fake = pd.DataFrame({
    'la-resident': [1.,0.,1.,1.,0.,0.,0.,1.,1.,1.,1.,1.,0.,0.,0.],
    'fan-of-lakers': [1.,0.,1.,1.,0.,0.,1.,0.,0.,0.,0.,0.,1.,1.,1.], 
    'color' : ['green', 'red', 'green', 'green', 'red', 'red', 'purple',
               'blue', 'blue', 'blue', 'blue', 'blue', 'purple', 'purple', 'purple']  
})
fake

Unnamed: 0,la-resident,fan-of-lakers,color
0,1.0,1.0,green
1,0.0,0.0,red
2,1.0,1.0,green
3,1.0,1.0,green
4,0.0,0.0,red
5,0.0,0.0,red
6,0.0,1.0,purple
7,1.0,0.0,blue
8,1.0,0.0,blue
9,1.0,0.0,blue


In [35]:
sets = fake.groupby(['la-resident','fan-of-lakers']).agg('count')
sets = sets.reset_index()
#Add cols that dictate where the chart labels should be placed
#in relation to the point. #Bokeh doesn't increase the axis limits
#to accomodate labels, so this makes sure the labels show up
#within the "square" of points
sets['xlabel'] = np.where(
    sets['la-resident'] == max(sets['la-resident']),
    sets['la-resident']-.07, sets['la-resident']+.02)
sets['ylabel'] = np.where(
    sets['fan-of-lakers'] == max(sets['fan-of-lakers']),
    sets['fan-of-lakers'] -.1, sets['fan-of-lakers'] +.02)
sets.columns = ['la-resident','fan-of-lakers','txt','xlabel','ylabel']
sets

Unnamed: 0,la-resident,fan-of-lakers,txt,xlabel,ylabel
0,0.0,0.0,3,0.02,0.02
1,0.0,1.0,4,0.02,0.9
2,1.0,0.0,5,0.93,0.02
3,1.0,1.0,3,0.93,0.9


In [36]:
#Create table of the 4 clusters created by components
meanScaling = True
stdScaling = True

X = StandardScaler(with_mean=meanScaling, 
                   with_std=stdScaling).fit_transform(fake[['la-resident', 'fan-of-lakers']])
eig_val_cov, eig_vec_cov = np.linalg.eig(np.cov(X.T))
#Create the components in order of importance
ev_order = rankdata(eig_val_cov, method='ordinal')
eig_vec_cov = np.asarray([eig_vec_cov[i-1] for i in np.flip(ev_order)])
comp1 = eig_vec_cov[:,0].T.dot(X.T)
comp2 = eig_vec_cov[:,1].T.dot(X.T)
_ = pd.DataFrame({'comp1':np.round(comp1,2), 'comp2':np.round(comp2,2), 'color':fake.color})
pcaset = _.groupby(['comp1','comp2']).agg('count')
pcaset = pcaset.reset_index()
#Add cols that dictate where the chart labels should be placed
pcaset['xlabel'] = np.where(
    pcaset['comp1'] == max(pcaset['comp1']),
    pcaset['comp1']-.1, pcaset['comp1']+.02)
pcaset['ylabel'] = np.where(
    pcaset['comp2'] == max(pcaset['comp2']),
    pcaset['comp2']-.1, pcaset['comp2']+.02)
pcaset.columns = ['comp1','comp2','txt','xlabel','ylabel']
display(pcaset)

Unnamed: 0,comp1,comp2,txt,xlabel,ylabel
0,-1.51,0.0,4,-1.49,0.02
1,-0.09,-1.42,3,-0.07,-1.4
2,-0.09,1.42,3,-0.07,1.32
3,1.32,0.0,5,1.22,0.02


In [38]:
#Initiate 3 charts in a row: initial data, eigenvectors, PCA data:
s = 300 #Size of charts

#First Chart
p = figure(title = "Binary Dataset", plot_width=s, plot_height=s,match_aspect=True)
p.xaxis.axis_label = 'LA Resident'
p.yaxis.axis_label = 'Lakers Fan'
b = p.circle(fake['la-resident'], fake["fan-of-lakers"], color=fake['color'],
         fill_alpha=0.2, size=10)
#dynamically place labels
labels = LabelSet(x='xlabel',y='ylabel',text='txt',source=ColumnDataSource(sets))

p.add_layout(labels)


# for i in sets.iterrows():
#     l = Label(x=i[1][0]+i[1][3], y=i[1][1]+i[1][4], text=str(int(i[1][2])))
#     p.add_layout(l)

#Second Chart
ev = figure(title = 'Eigenvectors, Eigenvalues', plot_width=s,
            plot_height=s, match_aspect=True)
ev1 = ev.line([0, eig_vec_cov[:,0][0]], [0,eig_vec_cov[:,0][1]], line_width=2)
ev2 = ev.line([0, eig_vec_cov[:,1][0]], [0,eig_vec_cov[:,1][1]], line_width=2)

#Third Chart
pca = figure(title = 'Binary Value Decomposition', plot_width=s, 
             plot_height=s,match_aspect=True)
pca.xaxis.axis_label = 'Component 1'
pca.yaxis.axis_label = 'Component 2'
c = pca.circle(comp1, comp2,color=fake['color'],
         fill_alpha=0.2, size=10)
#dynamically place labels
pcalabels = LabelSet(x='xlabel',y='ylabel',text='txt',source=ColumnDataSource(pcaset))

pca.add_layout(pcalabels)
# put the results in a row
z = show(row(p, ev, pca), notebook_handle=True)



In [None]:
#Remove controls
#lock axes somehow
#error warning
#delete points

In [33]:
interact_manual(update, action=['add', 'del', 'change scaling only'], 
                pt=[[0,0,'red'], [1,0,'blue'], [0,1,'purple'], [1,1,'green']],
                meanScaling=True, stdScaling=True)

interactive(children=(Dropdown(description='action', options=('add', 'del', 'change scaling only'), value='add…

<function __main__.update(action, pt, meanScaling, stdScaling)>

In [32]:
def update(action, pt, meanScaling, stdScaling):
    global fake
    if action=='add':
        fake = fake.append({'la-resident':pt[0],
                            'fan-of-lakers':pt[1],
                            'color': pt[2]}, ignore_index=True)
    elif action=='del':
        for index, row in fake.iterrows():
            if row['la-resident'] == pt[0] and row['fan-of-lakers'] == pt[1]:
                fake = fake.drop(index, inplace=True)
                break
    sets = fake.groupby(['la-resident','fan-of-lakers']).agg('count')
    sets = sets.reset_index()
    #Add cols that dictate where the chart labels should be placed
    #in relation to the point. #Bokeh doesn't increase the axis limits
    #to accomodate labels, so this makes sure the labels show up
    #within the "square" of points
    sets['xlabel'] = np.where(
        sets['la-resident'] == max(sets['la-resident']),
        sets['la-resident']-.07, sets['la-resident']+.02)
    sets['ylabel'] = np.where(
        sets['fan-of-lakers'] == max(sets['fan-of-lakers']),
        sets['fan-of-lakers'] -.1, sets['fan-of-lakers'] +.02)
    sets.columns = ['la-resident','fan-of-lakers','txt','xlabel','ylabel']
    X = StandardScaler(with_mean=meanScaling, 
                   with_std=stdScaling).fit_transform(fake[['la-resident', 'fan-of-lakers']])
    eig_val_cov, eig_vec_cov = np.linalg.eig(np.cov(X.T))
    #Create the components in order of importance
    ev_order = rankdata(eig_val_cov, method='ordinal')
    eig_vec_cov = np.asarray([eig_vec_cov[i-1] for i in np.flip(ev_order)])
    comp1 = eig_vec_cov[:,0].T.dot(X.T)
    print(len(comp1))
    comp2 = eig_vec_cov[:,1].T.dot(X.T)
    _ = pd.DataFrame({'comp1':np.round(comp1,2), 'comp2':np.round(comp2,2), 'color':fake.color})
    pcaset = _.groupby(['comp1','comp2']).agg('count')
    pcaset = pcaset.reset_index()
    display(pcaset)
    
    #Add cols that dictate where the chart labels should be placed
    pcaset['xlabel'] = np.where(
        pcaset['comp1'] == max(pcaset['comp1']),
        pcaset['comp1']-.1, pcaset['comp1']+.02)
    pcaset['ylabel'] = np.where(
        pcaset['comp2'] == max(pcaset['comp2']),
        pcaset['comp2']-.1, pcaset['comp2']+.02)
    pcaset.columns = ['comp1','comp2','txt','xlabel','ylabel']
    
#     b = p.circle(fake['la-resident'], fake["fan-of-lakers"], color=fake['color'],
#          fill_alpha=0.2, size=10)
#     labels = LabelSet(x='xlabel',y='ylabel',text='txt',source=ColumnDataSource(sets))
#     p.add_layout(labels)
#     ev1 = ev.line([0, eig_vec_cov[:,0][0]], [0,eig_vec_cov[:,0][1]], line_width=2)
#     ev2 = ev.line([0, eig_vec_cov[:,1][0]], [0,eig_vec_cov[:,1][1]], line_width=2)
#     c = pca.circle(comp1, comp2,color=fake['color'],
#          fill_alpha=0.2, size=10)
    b.data_source.data['x'] = fake['la-resident']
    b.data_source.data['y'] = fake['fan-of-lakers']
    b.data_source.data['fill_color'] = fake['color']
    b.data_source.data['line_color'] = fake['color']
    c.data_source.data['x'] = comp1
    c.data_source.data['y'] = comp2
    c.data_source.data['fill_color'] = fake['color']
    c.data_source.data['line_color'] = fake['color']
    labels.source.data['txt'] = sets['txt']
    labels.source.data['xlabel'] = sets['xlabel']
    labels.source.data['ylabel'] = sets['ylabel']
    pcalabels.source.data['txt'] = pcaset['txt']
    pcalabels.source.data['xlabel'] = pcaset['xlabel']
    pcalabels.source.data['ylabel'] = pcaset['ylabel']
    

    


    push_notebook(handle=z)

In [24]:
pcalabels.source.data['txt']

array([4, 3, 3, 5], dtype=int64)

In [None]:
b = p.circle(fake['la-resident'], fake["fan-of-lakers"], color=fake['color'],
          fill_alpha=0.2, size=10)
b.data_source.data['line_color']

In [None]:
x = np.linspace(0, 2*np.pi, 2000)
y = np.sin(x)

In [None]:
p = figure(title="simple line example", plot_height=300, plot_width=600, y_range=(-5,5),
           background_fill_color='#efefef')
r = p.line(x, y, color="#8888cc", line_width=1.5, alpha=0.8)
type(p)

In [None]:
def update(f, w=1, A=1, phi=0, l=2000):
    if   f == "sin": func = np.sin
    elif f == "cos": func = np.cos
    x = np.linspace(0, 2*np.pi, l)
    r.data_source.data['x'] = x
    r.data_source.data['y'] = A * func(w * x + phi)
    push_notebook()

In [None]:
show(p, notebook_handle=True)

In [None]:
from ipywidgets import interact
interact(update, f=["sin", "cos"], w=(0,50), A=(1,10), phi=(0, 20, 0.1), l=(0,2000))
