# CS 690 Homework2

## Overview

In this homework, I read the two datasets obtained from Moodle. One is the wholesale data with six missing values, another is the wholesale data without missing values. To deal with the missing value problem, I applied four methods which are ignoring the tuples with the missing values, filling the missing datapoints with a global constant, performing linear interpolation, and inserting attribute mean values. In the visualization part, I used two plots for each method. The plots on the left are visualizations of the **Wholesale customer data-missing** dataset after dealing with missing values, the plots on the right are visualizations of the **Wholesale customers data** dataset. All of these visualizations contain **box zoom**, **wheel zoom**, **pan**, **hover**, and **slider**. Some of the codes refers to Bokeh Examples on [Github](https://github.com/bokeh/bokeh/tree/0.12.9)

**Reference:** [Bokeh Examples](https://github.com/bokeh/bokeh/tree/0.12.9)

In [1]:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from pandas import DataFrame as df

from bokeh.core.properties import field
from ipywidgets import interact
from bokeh.io import push_notebook, output_notebook
from bokeh.layouts import layout
from bokeh.models import (
    ColumnDataSource, HoverTool, SingleIntervalTicker, Slider, Button, Label,
    CategoricalColorMapper, CustomJS,
)
from bokeh.palettes import Spectral6, brewer
from bokeh.plotting import figure, show
from bokeh.layouts import column, widgetbox, row

output_notebook()

In [2]:
def process_data_missing(choice = 0):
    raw_data = df.from_csv("Wholesale customers data-missing.csv")
    
    new_data = None
    
    raw_data = raw_data.reset_index()

    result = raw_data.isnull()
    
    cols = ['Fresh', 'Milk', 'Grocery', 'Frozen', 'Detergents_Paper', 'Delicassen']
    
    df_dict = {}
    
    for col in cols:
        stat = col + ' == True'
        df_dict[col] = list(result.query(stat).index)

    if choice == 0:
        #return data_frame after ignoring tuples with missing datapoints.
        new_data = raw_data.dropna()
    elif choice == 1:
        #return data_frame after filling missing datapoints with global constant -1.
        new_data = raw_data.fillna(-20)
    elif choice == 2:
        #return data_frame after performing linear interpolation at missing datapoints.
        new_data = raw_data.interpolate(method='linear')
    elif choice == 3:
        #return data_frame after filling missing datapoints with attribute mean.
        new_data = raw_data.fillna(raw_data.mean())
    
    return new_data, df_dict

In [3]:
def process_data_all():
    raw_data = df.from_csv("Wholesale customers data.csv")
    
    raw_data = raw_data.reset_index()

    return raw_data

In [4]:
ws_df_all = process_data_all()

ws_df, df_dict = process_data_missing(0)

cate = list(ws_df_all.columns)

index_all = list(ws_df_all.index)
index_missing = list(ws_df.index)

channel_all = ws_df_all.loc[:, cate[0]].tolist()

regions_all = ws_df_all.loc[:, cate[1]].tolist()

xaxis_name = cate[2]
yaxis_name = cate[4]

labels_all = []

for i in range(len(index_all)):
    if index_all[i] in df_dict[yaxis_name]:
        labels_all.append('missing')
    else:
        labels_all.append(','.join(['channel:' + str(channel_all[i]), 'regions:'+str(regions_all[i])]))

#display data without missing data
source_all = ColumnDataSource(dict(x = ws_df_all.loc[:, xaxis_name].tolist(), y = ws_df_all.loc[:, yaxis_name].tolist(), legend_names = labels_all))

plot_all = figure(title=' '.join([yaxis_name, 'vs. ', xaxis_name, 'without Missing Values']), plot_height=400, plot_width= 490)
plot_all.xaxis.ticker = SingleIntervalTicker(interval=20000)
plot_all.xaxis.axis_label = xaxis_name
plot_all.yaxis.ticker = SingleIntervalTicker(interval=20000)
plot_all.yaxis.axis_label = yaxis_name

color_mapper_all = CategoricalColorMapper(palette=brewer['Spectral'][7], factors=list(set(labels_all)))

r_all = plot_all.circle(
    'x',
    'y',
    size=10,
    source=source_all,
    fill_color={'field': 'legend_names', 'transform': color_mapper_all},
    fill_alpha=0.7,
    line_color='#7c7e71',
    line_width=0.5,
    line_alpha=0.5,
    legend='legend_names',
)


plot_all.add_tools(HoverTool(tooltips=[(xaxis_name, '$x'), (yaxis_name, '$y')], show_arrow=True, point_policy='follow_mouse'))


channel_missing = ws_df.loc[:, cate[0]].tolist()

regions_missing = ws_df.loc[:, cate[1]].tolist()

label_missing = []

for i in range(len(index_missing)):
    if index_missing[i] in df_dict[yaxis_name]:
        label_missing.append('missing')
    else:
        label_missing.append(','.join(['channel:' + str(channel_missing[i]), 'regions:'+str(regions_missing[i])]))

source_missing = ColumnDataSource(dict(x = ws_df.loc[:, xaxis_name].tolist(), y = ws_df.loc[:, yaxis_name].tolist(), legend_names = label_missing))

plot_missing = figure(title=' '.join([yaxis_name, 'vs. ', xaxis_name, 'with Missing Values']), plot_height=400, plot_width= 490)
plot_missing.xaxis.ticker = SingleIntervalTicker(interval=20000)
plot_missing.xaxis.axis_label = xaxis_name
plot_missing.yaxis.ticker = SingleIntervalTicker(interval=20000)
plot_missing.yaxis.axis_label = yaxis_name

# color_mapper_missing = CategoricalColorMapper(palette=brewer['Spectral'][7], factors=list(set(label_missing)))

r_missing = plot_missing.circle(
    'x',
    'y',
    size=10,
    source=source_missing,
    fill_color={'field': 'legend_names', 'transform': color_mapper_all},
    fill_alpha=0.7,
    line_color='#7c7e71',
    line_width=0.5,
    line_alpha=0.5,
    legend='legend_names',
)

plot_missing.add_tools(HoverTool(tooltips=[(xaxis_name, '$x'), (yaxis_name, '$y')], show_arrow=True, point_policy='follow_mouse'))

layouts = row(plot_missing, plot_all)

def update_notebook(x = cate[2], y = cate[3]):
    if x == y:
        pass
    else:
        
        label_missing = []
        
        for i in range(len(index_missing)):
            if index_missing[i] in df_dict[yaxis_name]:
                label_missing.append('missing')
            else:
                label_missing.append(','.join(['channel:' + str(channel_missing[i]), 'regions:'+str(regions_missing[i])]))
                
        r_missing.data_source.data['x'] = ws_df.loc[:, x].tolist()
        r_missing.data_source.data['y'] = ws_df.loc[:, y].tolist()
        r_missing.data_source.data['legend_names'] = label_missing
        
        labels_all = []

        for i in range(len(index_all)):
            if index_all[i] in df_dict[y]:
                labels_all.append('missing')
            else:
                labels_all.append(','.join(['channel:' + str(channel_all[i]), 'regions:'+str(regions_all[i])]))
        
        r_all.data_source.data['x'] = ws_df_all.loc[:, x].tolist()
        r_all.data_source.data['y'] = ws_df_all.loc[:, y].tolist()
        r_all.data_source.data['legend_names'] = labels_all
        
        plot_all.title.text = ' '.join([y, 'vs. ', x, 'without Missing Values'])
        plot_all.xaxis.axis_label = x
        plot_all.yaxis.axis_label = y
        
        plot_missing.title.text = ' '.join([y, 'vs. ', x, 'with Missing Values'])
        plot_missing.xaxis.axis_label = x
        plot_missing.yaxis.axis_label = y
        push_notebook()

### Deal with the missing data by ignoring the tuples with missing datapoints

There are two scatter plots below. The left one visualizes the results after ignoring the tuples with missing datapoints, the right one visualizes the original dataset which doesn't contain missing values. The missing datapoints are marked with a different color on both plots. The jupyter interactor enable you choose different attributes to be the x axis or the y axis. And the hover tells you the values of a specific datapoint on the plots.

(*This plot may take a while to load on nbviewer*)

In [5]:
show(layouts, notebook_handle = True)

In [6]:
interact(update_notebook, x = cate[2:], y = cate[2:])

In [7]:
ws_df_all = process_data_all()

ws_df, df_dict = process_data_missing(1)

cate = list(ws_df_all.columns)

index_all = list(ws_df_all.index)
index_missing = list(ws_df.index)

channel_all = ws_df_all.loc[:, cate[0]].tolist()

regions_all = ws_df_all.loc[:, cate[1]].tolist()

xaxis_name = cate[2]
yaxis_name = cate[4]

labels_all = []

for i in range(len(index_all)):
    if index_all[i] in df_dict[yaxis_name]:
        labels_all.append('missing')
    else:
        labels_all.append(','.join(['channel:' + str(channel_all[i]), 'regions:'+str(regions_all[i])]))

#display data without missing data
source_all = ColumnDataSource(dict(x = ws_df_all.loc[:, xaxis_name].tolist(), y = ws_df_all.loc[:, yaxis_name].tolist(), legend_names = labels_all))

plot_all = figure(title=' '.join([yaxis_name, 'vs. ', xaxis_name, 'without Missing Values']), plot_height=400, plot_width= 490)
plot_all.xaxis.ticker = SingleIntervalTicker(interval=20000)
plot_all.xaxis.axis_label = xaxis_name
plot_all.yaxis.ticker = SingleIntervalTicker(interval=20000)
plot_all.yaxis.axis_label = yaxis_name

color_mapper_all = CategoricalColorMapper(palette=brewer['Spectral'][7], factors=list(set(labels_all)))

r_all = plot_all.circle(
    'x',
    'y',
    size=10,
    source=source_all,
    fill_color={'field': 'legend_names', 'transform': color_mapper_all},
    fill_alpha=0.7,
    line_color='#7c7e71',
    line_width=0.5,
    line_alpha=0.5,
    legend='legend_names',
)


plot_all.add_tools(HoverTool(tooltips=[(xaxis_name, '$x'), (yaxis_name, '$y')], show_arrow=True, point_policy='follow_mouse'))


channel_missing = ws_df.loc[:, cate[0]].tolist()

regions_missing = ws_df.loc[:, cate[1]].tolist()

label_missing = []

for i in range(len(index_missing)):
    if index_missing[i] in df_dict[yaxis_name]:
        label_missing.append('missing')
    else:
        label_missing.append(','.join(['channel:' + str(channel_missing[i]), 'regions:'+str(regions_missing[i])]))

source_missing = ColumnDataSource(dict(x = ws_df.loc[:, xaxis_name].tolist(), y = ws_df.loc[:, yaxis_name].tolist(), legend_names = label_missing))

plot_missing = figure(title=' '.join([yaxis_name, 'vs. ', xaxis_name, 'with Missing Values']), plot_height=400, plot_width= 490)
plot_missing.xaxis.ticker = SingleIntervalTicker(interval=20000)
plot_missing.xaxis.axis_label = xaxis_name
plot_missing.yaxis.ticker = SingleIntervalTicker(interval=20000)
plot_missing.yaxis.axis_label = yaxis_name

# color_mapper_missing = CategoricalColorMapper(palette=brewer['Spectral'][7], factors=list(set(label_missing)))

r_missing = plot_missing.circle(
    'x',
    'y',
    size=10,
    source=source_missing,
    fill_color={'field': 'legend_names', 'transform': color_mapper_all},
    fill_alpha=0.7,
    line_color='#7c7e71',
    line_width=0.5,
    line_alpha=0.5,
    legend='legend_names',
)

plot_missing.add_tools(HoverTool(tooltips=[(xaxis_name, '$x'), (yaxis_name, '$y')], show_arrow=True, point_policy='follow_mouse'))

layouts = row(plot_missing, plot_all)

def update_notebook(x = cate[2], y = cate[3]):
    if x == y:
        pass
    else:
        
        label_missing = []
        
        for i in range(len(index_missing)):
            if index_missing[i] in df_dict[yaxis_name]:
                label_missing.append('missing')
            else:
                label_missing.append(','.join(['channel:' + str(channel_missing[i]), 'regions:'+str(regions_missing[i])]))
                
        r_missing.data_source.data['x'] = ws_df.loc[:, x].tolist()
        r_missing.data_source.data['y'] = ws_df.loc[:, y].tolist()
        r_missing.data_source.data['legend_names'] = label_missing
        
        labels_all = []

        for i in range(len(index_all)):
            if index_all[i] in df_dict[y]:
                labels_all.append('missing')
            else:
                labels_all.append(','.join(['channel:' + str(channel_all[i]), 'regions:'+str(regions_all[i])]))
        
        r_all.data_source.data['x'] = ws_df_all.loc[:, x].tolist()
        r_all.data_source.data['y'] = ws_df_all.loc[:, y].tolist()
        r_all.data_source.data['legend_names'] = labels_all
        
        plot_all.title.text = ' '.join([y, 'vs. ', x, 'without Missing Values'])
        plot_all.xaxis.axis_label = x
        plot_all.yaxis.axis_label = y
        
        plot_missing.title.text = ' '.join([y, 'vs. ', x, 'with Missing Values'])
        plot_missing.xaxis.axis_label = x
        plot_missing.yaxis.axis_label = y
        push_notebook()

### Deal with the missing data by filling the missing values with a global constant -20

There are two scatter plots below. The left one visualizes the results after filling the missing values with a global constant -20, the right one visualizes the original dataset which doesn't contain missing values. The missing datapoints are marked with a different color on both plots. The jupyter interactor enable you choose different attributes to be the x axis or the y axis. And the hover tells you the values of a specific datapoint on the plots.

(*This plot may take a while to load on nbviewer*)

In [8]:
show(layouts, notebook_handle = True)

In [9]:
interact(update_notebook, x = cate[2:], y = cate[2:])

<function __main__.update_notebook>

In [10]:
ws_df_all = process_data_all()

ws_df, df_dict = process_data_missing(2)

cate = list(ws_df_all.columns)

index_all = list(ws_df_all.index)
index_missing = list(ws_df.index)

channel_all = ws_df_all.loc[:, cate[0]].tolist()

regions_all = ws_df_all.loc[:, cate[1]].tolist()

xaxis_name = cate[2]
yaxis_name = cate[4]

labels_all = []

for i in range(len(index_all)):
    if index_all[i] in df_dict[yaxis_name]:
        labels_all.append('missing')
    else:
        labels_all.append(','.join(['channel:' + str(channel_all[i]), 'regions:'+str(regions_all[i])]))

#display data without missing data
source_all = ColumnDataSource(dict(x = ws_df_all.loc[:, xaxis_name].tolist(), y = ws_df_all.loc[:, yaxis_name].tolist(), legend_names = labels_all))

plot_all = figure(title=' '.join([yaxis_name, 'vs. ', xaxis_name, 'without Missing Values']), plot_height=400, plot_width= 490)
plot_all.xaxis.ticker = SingleIntervalTicker(interval=20000)
plot_all.xaxis.axis_label = xaxis_name
plot_all.yaxis.ticker = SingleIntervalTicker(interval=20000)
plot_all.yaxis.axis_label = yaxis_name

color_mapper_all = CategoricalColorMapper(palette=brewer['Spectral'][7], factors=list(set(labels_all)))

r_all = plot_all.circle(
    'x',
    'y',
    size=10,
    source=source_all,
    fill_color={'field': 'legend_names', 'transform': color_mapper_all},
    fill_alpha=0.7,
    line_color='#7c7e71',
    line_width=0.5,
    line_alpha=0.5,
    legend='legend_names',
)


plot_all.add_tools(HoverTool(tooltips=[(xaxis_name, '$x'), (yaxis_name, '$y')], show_arrow=True, point_policy='follow_mouse'))


channel_missing = ws_df.loc[:, cate[0]].tolist()

regions_missing = ws_df.loc[:, cate[1]].tolist()

label_missing = []

for i in range(len(index_missing)):
    if index_missing[i] in df_dict[yaxis_name]:
        label_missing.append('missing')
    else:
        label_missing.append(','.join(['channel:' + str(channel_missing[i]), 'regions:'+str(regions_missing[i])]))

source_missing = ColumnDataSource(dict(x = ws_df.loc[:, xaxis_name].tolist(), y = ws_df.loc[:, yaxis_name].tolist(), legend_names = label_missing))

plot_missing = figure(title=' '.join([yaxis_name, 'vs. ', xaxis_name, 'with Missing Values']), plot_height=400, plot_width= 490)
plot_missing.xaxis.ticker = SingleIntervalTicker(interval=20000)
plot_missing.xaxis.axis_label = xaxis_name
plot_missing.yaxis.ticker = SingleIntervalTicker(interval=20000)
plot_missing.yaxis.axis_label = yaxis_name

# color_mapper_missing = CategoricalColorMapper(palette=brewer['Spectral'][7], factors=list(set(label_missing)))

r_missing = plot_missing.circle(
    'x',
    'y',
    size=10,
    source=source_missing,
    fill_color={'field': 'legend_names', 'transform': color_mapper_all},
    fill_alpha=0.7,
    line_color='#7c7e71',
    line_width=0.5,
    line_alpha=0.5,
    legend='legend_names',
)

plot_missing.add_tools(HoverTool(tooltips=[(xaxis_name, '$x'), (yaxis_name, '$y')], show_arrow=True, point_policy='follow_mouse'))

layouts = row(plot_missing, plot_all)

def update_notebook(x = cate[2], y = cate[3]):
    if x == y:
        pass
    else:
        
        label_missing = []
        
        for i in range(len(index_missing)):
            if index_missing[i] in df_dict[yaxis_name]:
                label_missing.append('missing')
            else:
                label_missing.append(','.join(['channel:' + str(channel_missing[i]), 'regions:'+str(regions_missing[i])]))
                
        r_missing.data_source.data['x'] = ws_df.loc[:, x].tolist()
        r_missing.data_source.data['y'] = ws_df.loc[:, y].tolist()
        r_missing.data_source.data['legend_names'] = label_missing
        
        labels_all = []

        for i in range(len(index_all)):
            if index_all[i] in df_dict[y]:
                labels_all.append('missing')
            else:
                labels_all.append(','.join(['channel:' + str(channel_all[i]), 'regions:'+str(regions_all[i])]))
        
        r_all.data_source.data['x'] = ws_df_all.loc[:, x].tolist()
        r_all.data_source.data['y'] = ws_df_all.loc[:, y].tolist()
        r_all.data_source.data['legend_names'] = labels_all
        
        plot_all.title.text = ' '.join([y, 'vs. ', x, 'without Missing Values'])
        plot_all.xaxis.axis_label = x
        plot_all.yaxis.axis_label = y
        
        plot_missing.title.text = ' '.join([y, 'vs. ', x, 'with Missing Values'])
        plot_missing.xaxis.axis_label = x
        plot_missing.yaxis.axis_label = y
        push_notebook()

### Deal with the missing data by performing linear interpolation

There are two scatter plots below. The left one visualizes the results after performing linear interpolation, the right one visualizes the original dataset which doesn't contain missing values. The missing datapoints are marked with a different color on both plots. The jupyter interactor enable you choose different attributes to be the x axis or the y axis. And the hover tells you the values of a specific datapoint on the plots.

(*This plot may take a while to load on nbviewer*)

In [11]:
show(layouts, notebook_handle = True)

In [12]:
interact(update_notebook, x = cate[2:], y = cate[2:])

<function __main__.update_notebook>

In [13]:
ws_df_all = process_data_all()

ws_df, df_dict = process_data_missing(3)

cate = list(ws_df_all.columns)

index_all = list(ws_df_all.index)
index_missing = list(ws_df.index)

channel_all = ws_df_all.loc[:, cate[0]].tolist()

regions_all = ws_df_all.loc[:, cate[1]].tolist()

xaxis_name = cate[2]
yaxis_name = cate[4]

labels_all = []

for i in range(len(index_all)):
    if index_all[i] in df_dict[yaxis_name]:
        labels_all.append('missing')
    else:
        labels_all.append(','.join(['channel:' + str(channel_all[i]), 'regions:'+str(regions_all[i])]))

#display data without missing data
source_all = ColumnDataSource(dict(x = ws_df_all.loc[:, xaxis_name].tolist(), y = ws_df_all.loc[:, yaxis_name].tolist(), legend_names = labels_all))

plot_all = figure(title=' '.join([yaxis_name, 'vs. ', xaxis_name, 'without Missing Values']), plot_height=400, plot_width= 490)
plot_all.xaxis.ticker = SingleIntervalTicker(interval=20000)
plot_all.xaxis.axis_label = xaxis_name
plot_all.yaxis.ticker = SingleIntervalTicker(interval=20000)
plot_all.yaxis.axis_label = yaxis_name

color_mapper_all = CategoricalColorMapper(palette=brewer['Spectral'][7], factors=list(set(labels_all)))

r_all = plot_all.circle(
    'x',
    'y',
    size=10,
    source=source_all,
    fill_color={'field': 'legend_names', 'transform': color_mapper_all},
    fill_alpha=0.7,
    line_color='#7c7e71',
    line_width=0.5,
    line_alpha=0.5,
    legend='legend_names',
)


plot_all.add_tools(HoverTool(tooltips=[(xaxis_name, '$x'), (yaxis_name, '$y')], show_arrow=True, point_policy='follow_mouse'))


channel_missing = ws_df.loc[:, cate[0]].tolist()

regions_missing = ws_df.loc[:, cate[1]].tolist()

label_missing = []

for i in range(len(index_missing)):
    if index_missing[i] in df_dict[yaxis_name]:
        label_missing.append('missing')
    else:
        label_missing.append(','.join(['channel:' + str(channel_missing[i]), 'regions:'+str(regions_missing[i])]))

source_missing = ColumnDataSource(dict(x = ws_df.loc[:, xaxis_name].tolist(), y = ws_df.loc[:, yaxis_name].tolist(), legend_names = label_missing))

plot_missing = figure(title=' '.join([yaxis_name, 'vs. ', xaxis_name, 'with Missing Values']), plot_height=400, plot_width= 490)
plot_missing.xaxis.ticker = SingleIntervalTicker(interval=20000)
plot_missing.xaxis.axis_label = xaxis_name
plot_missing.yaxis.ticker = SingleIntervalTicker(interval=20000)
plot_missing.yaxis.axis_label = yaxis_name

# color_mapper_missing = CategoricalColorMapper(palette=brewer['Spectral'][7], factors=list(set(label_missing)))

r_missing = plot_missing.circle(
    'x',
    'y',
    size=10,
    source=source_missing,
    fill_color={'field': 'legend_names', 'transform': color_mapper_all},
    fill_alpha=0.7,
    line_color='#7c7e71',
    line_width=0.5,
    line_alpha=0.5,
    legend='legend_names',
)

plot_missing.add_tools(HoverTool(tooltips=[(xaxis_name, '$x'), (yaxis_name, '$y')], show_arrow=True, point_policy='follow_mouse'))

layouts = row(plot_missing, plot_all)

def update_notebook(x = cate[2], y = cate[3]):
    if x == y:
        pass
    else:
        
        label_missing = []
        
        for i in range(len(index_missing)):
            if index_missing[i] in df_dict[yaxis_name]:
                label_missing.append('missing')
            else:
                label_missing.append(','.join(['channel:' + str(channel_missing[i]), 'regions:'+str(regions_missing[i])]))
                
        r_missing.data_source.data['x'] = ws_df.loc[:, x].tolist()
        r_missing.data_source.data['y'] = ws_df.loc[:, y].tolist()
        r_missing.data_source.data['legend_names'] = label_missing
        
        labels_all = []

        for i in range(len(index_all)):
            if index_all[i] in df_dict[y]:
                labels_all.append('missing')
            else:
                labels_all.append(','.join(['channel:' + str(channel_all[i]), 'regions:'+str(regions_all[i])]))
        
        r_all.data_source.data['x'] = ws_df_all.loc[:, x].tolist()
        r_all.data_source.data['y'] = ws_df_all.loc[:, y].tolist()
        r_all.data_source.data['legend_names'] = labels_all
        
        plot_all.title.text = ' '.join([y, 'vs. ', x, 'without Missing Values'])
        plot_all.xaxis.axis_label = x
        plot_all.yaxis.axis_label = y
        
        plot_missing.title.text = ' '.join([y, 'vs. ', x, 'with Missing Values'])
        plot_missing.xaxis.axis_label = x
        plot_missing.yaxis.axis_label = y
        push_notebook()

### Deal with the missing data by inserting attribute mean values

There are two scatter plots below. The left one visualizes the results after inserting attribute mean values, the right one visualizes the original dataset which doesn't contain missing values. The missing datapoints are marked with a different color on both plots. The jupyter interactor enable you choose different attributes to be the x axis or the y axis. And the hover tells you the values of a specific datapoint on the plots.

(*This plot may take a while to load on nbviewer*)

In [14]:
show(layouts, notebook_handle = True)

In [15]:
interact(update_notebook, x = cate[2:], y = cate[2:])

<function __main__.update_notebook>