Chrome is a memory hog, this notebook may work best in other browsers.

The random seed used for the example in this notebook is 138633, a random integer between 0 and 1E6 originally generated using the following cell.

In [50]:
seed = np.random.randint(1E6)
seed

138633

# Data import for GUI

## Data import and sample generation

The following cell defines everything the GUI needs to run.

In [3]:
# Data prep for visualization

import pickle
import sys
sys.path.append('python')
import clusterOutliers
import numpy as np
import pandas as pd

# Import full quarter coo
file_path    = 'data/output/Q8_sample.coo'
score_column = 'k1_1000x10'
reduction_name = 'PCA90'

with open(file_path,'rb') as file:
    coo = pickle.load(file)

ft_data = coo.data
# choose subset of data to plot, the full quarter will typically be too much.
# Recommendation: include all outlying points (determined however) and a sampling of the normal data
scores_df           = coo.scores.copy()
scores_df['KIC']    = [int(i[4:13]) for i in scores_df.index] # KIC as integers for easier parsing
scores_df['files']  = scores_df.index # keeping the files handy
scores_df.set_index('KIC',inplace=True) # setting the index to KIC integers
top_out_KIC         = scores_df.sort_values(score_column,ascending=False).head(1000).index # 
seed = 138633
np.random.seed(seed) # setting the random seed to the integer generated earlier
# 1000 pts sampled from the least outlying points (all points excluding the top outlying points)
rand_bottom_out_KIC = scores_df.sort_values(score_column,ascending=True).head(-1000).sample(1000).index
sample              = np.append(top_out_KIC,rand_bottom_out_KIC) # numpy array of KICs for our sample
samp_df             = scores_df.loc[sample,:]

# x and y coordinates 
# Using the PCA90 reduction (only one included in our example)
reduct_df    = coo.reductions[reduction_name].loc[samp_df.files,:]
samp_df['x'] = list(reduct_df.iloc[:,0])
samp_df['y'] = list(reduct_df.iloc[:,1])

cmaps = None # for custom color maps adjust next cell
samp_df['colors']= "#1f77b4" # default coloring

## Custom color maps for plot

In [27]:
# colors_for_plot maps rgba color values to the range of values in a numerical array, whether continuous or discrete
from quarterTools import colors_for_plot
rgbas = [colors_for_plot(samp_df['DB_outliers'],'color_blind'),
         colors_for_plot(-samp_df['k1_1000x10'],'viridis'), # flipping scores to match outlier colors
         colors_for_plot(-np.log(samp_df['k1_1000x10']),'viridis')]

# converted to hex values for bokeh
from matplotlib.colors import to_hex
cmaps  = ["Outliers","Scores - Linear","Scores - Log"] # choose sensible and descriptive names for the color maps

for i,rgba in enumerate(rgbas):
    samp_df[cmaps[i]] = [to_hex(c) for c in rgba]

samp_df["colors"] = samp_df[cmaps[0]] # setting default color values for plot to outliers

Bokeh was being uncooperative with it's color mapper combined with the selection tool.
I could get the colors to change when a different color selection was made,
but it wouldn't update the color mapping to selected data. Whenever switching to a new
color selection, selecting data would revert to the previous color mapping except where
colors are given explicitly. So, I'm defining the colors explicitly using matplotlib
instead of using the more convenient Bokeh mapper function which I can't find a way to save the 
mapping as an array for the life of me. It's clunky and I hate it but it works.

In [5]:
from bokeh.io import show,output_notebook
output_notebook() 

# The Interactive GUI

In [93]:
from bokeh.plotting import figure
from bokeh.layouts import row,gridplot,column
from bokeh.models import ColumnDataSource, Circle
from bokeh.models.widgets import Button, Select
from bokeh.events import Tap
from bokeh.palettes import Colorblind4,Category10
from bokeh.transform import factor_cmap, linear_cmap, log_cmap

from lightkurve import search_lightcurvefile

def quarter_viz(doc):
    ##### Data Import #####
    s1 = ColumnDataSource(viz_df)
    
    s2=ColumnDataSource(data={
        't'  : [],
        'nf' : []
    })
    
    ##### Bokeh Setup #####
    # Tools to use
    TOOLS     = "pan,wheel_zoom,reset,tap,box_select,poly_select"
    alphas    = {1:1,2:1,3:1,4:1,5:1,6:1,7:1,8:1,9:1,10:1} # for lc plot
    
    # Set up callbacks
    def auto_update_plot(attr,old,new):
        inds = new
        if len(inds)==0:
            inds=[0]
        elif len(inds)>10:
            print("Only showing 10 of {} selected.".format(len(inds)))
            inds = inds[:10] # too many lightcurves are illegible, 10 is pushing it.

        alpha = alphas[len(inds)]
        lc_files = list(s1.data['files'][inds])
        new_ts = [[]]*len(inds)
        new_nfs = [[]]*len(inds)
        reset_data = {'t':new_ts,'nf':new_nfs,'colors':np.array(Category10[10][:len(inds)])}
        s2.data=reset_data
        plc.title.text = "KIC "
        for i,ind in enumerate(inds):
            lc = lc_files[i][4:13]

            # download Kepler lighcurve via Lightkurve
            lcf = search_lightcurvefile(lc, mission="Kepler", quarter=Quarter).download()
            # use the normalized PDCSAP flux 
            nlc = lcf.PDCSAP_FLUX.normalize()
            new_ts[i] = nlc.time
            new_nfs[i] = nlc.flux+i
            plc.title.text += lc + " "
        newdata = {'t':new_ts,'nf':new_nfs,'colors':np.array(Category10[10][:len(inds)])}
        s2.data = newdata
        lg.glyph.line_alpha=alpha
        
    # Idea for multi-line plotting with different colors:
    ts = [[]] # empty arrays for now 
    nfs = [[]]
    s2 = ColumnDataSource(data={
        't':ts,
        'nf':nfs,
        'colors':[Category10[10][0]]
    })

    plc = figure(tools="pan,wheel_zoom,reset",plot_width=1000,plot_height=200)
    plc.title.text = "Select a point to plot a light curve"
    lg = plc.multi_line('t','nf',color='colors',source=s2)
    # create a plot for the light curve and add a line renderer
     
    def update_colors(attrname, old, new):
        s1.data['colors'] = s1.data[select.value]

    # Bokeh data
    # create a column data source for the plots to share

    select = Select(title="Color Scheme:", value=cmaps[0], options=cmaps)
    select.on_change('value',update_colors)

    # create a new plot and add a renderer
    left = figure(tools=TOOLS, plot_width=1000, plot_height=600, title=None)
    scatter = left.circle(x='x', y='y', fill_color='colors', line_color=None, size=4, source=s1)
    s1.selected.on_change('indices',auto_update_plot)
    # Planning to incorporate a detailed view of the cluster center on a right plot in the future

    # Set up layouts and add to document
    inputs = row(select)
    #p = gridplot([[left,right]]) #future planning
    layout = column(inputs, left, plc)
    doc.add_root(layout)


In [94]:
viz_df  = samp_df
Quarter = 8
show(quarter_viz,notebook_url="http://localhost:8888")



# Simplified, pure javascript interactivity to embed in website 

The modified code below uses png images of the lightcurves instead of the raw data so that pure javascript calls can be made, enabling easy embedding in a website (specifically for my personal website). It is intended only to demonstrate the utility of the interactive GUI above.

In [1]:
import pandas as pd

In [2]:
gui_df = pd.read_csv('gui_df.csv')

In [3]:
gui_df.head()

Unnamed: 0.1,Unnamed: 0,x,y,files,labels,dblabels,scores,Outliers,Scores - Linear,Scores - Log,colors
0,kplr011241837-2011073133259_llc.fits,-0.424012,0.513335,./Plots/kplr011241837-2011073133259_llc.png,KIC 11241837,0,0.003211,#009999,#fbe723,#65cb5e,#1f77b4
1,kplr011821279-2011073133259_llc.fits,-0.281647,-1.439225,./Plots/kplr011821279-2011073133259_llc.png,KIC 11821279,0,0.003997,#009999,#f8e621,#52c569,#1f77b4
2,kplr006342144-2011073133259_llc.fits,-0.43924,0.53966,./Plots/kplr006342144-2011073133259_llc.png,KIC 6342144,0,0.002002,#009999,#fde725,#95d840,#1f77b4
3,kplr010355036-2011073133259_llc.fits,-0.199243,3.624122,./Plots/kplr010355036-2011073133259_llc.png,KIC 10355036,0,0.003108,#009999,#fbe723,#69cd5b,#1f77b4
4,kplr007757721-2011073133259_llc.fits,-0.203163,1.264593,./Plots/kplr007757721-2011073133259_llc.png,KIC 7757721,0,0.001226,#009999,#fde725,#cae11f,#1f77b4


In [73]:
import pandas as pd

from bokeh.io import show
from bokeh.plotting import figure
from bokeh.layouts import row,gridplot,column
from bokeh.models import ColumnDataSource, CustomJS
from bokeh.models.widgets import Select

from bokeh.resources import CDN
from bokeh.embed import autoload_static

gui_df = pd.read_csv('gui_df.csv')

s1 = ColumnDataSource(gui_df)
s1.data['colors'] = s1.data['Outliers'] # colors is a neutral blue by default, but why not make the outlier color scheme default
s2 = ColumnDataSource(data = {'lc_file':[gui_df['files'][0]]})
#def update_colors(attrname, old, new):
#    """ 
#    Bokeh was being uncooperative with it's color mapper combined with the selection tool.
#    I could get the colors to change when a different color selection was made,
#    but it wouldn't update the color mapping to selected data. Whenever switching to a new
#    color selection, selecting data would revert to the previous color mapping except where
#    colors are given explicitly. So, I'm defining the colors explicitly using matplotlib
#    instead of using the more convenient Bokeh mapper function which I can't find a way to save the 
#    mapping as an array for the life of me. It's clunky and I hate it but it works.
#    """
#    s1.data['colors'] = s1.data[select.value]

"""
def update_plot():
    try:
        ind = s1.selected.indices[0] # can only plot one at a time
    except:
        ind = 0

    lc_path = s1.data['files'][ind]
    lc_image = plc.image_url(url=[lc_path],x=0,y=0,w=10,h=3,anchor="bottom_left")

# Set up widgets
button = Button(label='Plot Selected')
button.on_click(update_plot)
"""
s1.selected.js_on_change('indices', CustomJS(args=dict(s1=s1, s2=s2), code="""
    var inds = cb_obj.indices;
    var d1 = s1.data;
    var d2 = s2.data;
    d2['lc_file'] = []
    d2['lc_file'].push(d1['files'][inds[0]])
    s2.change.emit();
""")
)


select = Select(title="Color Scheme:", value="Outliers", options=["Outliers", "Scores - Linear", "Scores - Log"])
select.js_on_change('value',CustomJS(args=dict(s1=s1),code="""
var color_scheme = cb_obj.value;
var d1 = s1.data;
d1['colors']=d1[color_scheme]
s1.change.emit()
"""))

# create a new plot and add a renderer
TOOLS     = "pan,wheel_zoom,reset,tap,box_select,poly_select"
left = figure(tools=TOOLS, plot_width=1000, plot_height=600, title=None)

scatter = left.circle(x='x', y='y', fill_color='colors', line_color=None, size=4, source=s1)
#left.on_event(Tap,update_plot)
plc = figure(plot_width=1000,plot_height=300,x_range=(0,10),y_range=(0,3))
plc.xaxis.axis_line_color = None
plc.yaxis.axis_line_color = None
plc.xaxis.major_tick_line_color = None
plc.xaxis.minor_tick_line_color = None
plc.yaxis.major_tick_line_color = None
plc.yaxis.minor_tick_line_color = None
plc.xaxis.major_label_text_color = None
plc.yaxis.major_label_text_color = None
lc_path = s1.data['files'][0]
lc_image = plc.image_url(url='lc_file',x=0,y=0,w=10,h=3,anchor="bottom_left",source=s2)

# Set up layouts and add to document
inputs = row(select)
#p = gridplot([[left,right]]) #future planning
layout = column(inputs, left, plc)
show(layout)
js, tag = autoload_static(layout,CDN,'assets/js/gui_ex.js')

In [75]:
with open('gui_ex.js','w') as file:
    file.write(js)