In [1]:
from os.path import join, dirname
import datetime
import dill
import pandas as pd
import numpy as np
from math import pi

from scipy.signal import savgol_filter
from bokeh.io import curdoc

from bokeh.charts import Bar
from bokeh.charts.attributes import ColorAttr, CatAttr
from bokeh.io import push_notebook, output_notebook, output_file, show
from bokeh.models import CustomJS, Slider, DataRange1d, Plot, LinearAxis, HoverTool, Range1d
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models.widgets import Panel, Tabs, Select, TextInput, Div
from bokeh.layouts import widgetbox, row, column

from statsmodels.tsa.arima_model import ARMA
from bokeh.palettes import Category20 as palette

output_notebook()

## Data Munging

In [2]:
df = pd.read_csv('./total2.csv', index_col=0)
df.fillna(0, inplace=True)

In [3]:
year_options = [str(y) for y in range(2016, 2003, -1)]

In [4]:
def add_dash(aa):
    return aa.replace('   ', '- ').replace('    ', '-- ')
df['Tax Category'] = df['Tax Category'].apply(add_dash)

In [5]:
state_names = [col for col in df.columns if col not in['Tax Category', 'calculated', 'year', 'United States']]
tax_types = [ tax for tax in list(df['Tax Category'])][:31]

In [6]:
state_dict = {'Mississippi': 'MS', 'Oklahoma': 'OK', 'Delaware': 'DE', 'Minnesota': 'MN', 'Illinois': 'IL', 'Arkansas': 'AR', 'New Mexico': 'NM', 'Indiana': 'IN', 'Maryland': 'MD', 'Louisiana': 'LA', 'Idaho': 'ID', 'Wyoming': 'WY', 'Tennessee': 'TN', 'Arizona': 'AZ', 'Iowa': 'IA', 'Michigan': 'MI', 'Kansas': 'KS', 'Utah': 'UT', 'Virginia': 'VA', 'Oregon': 'OR', 'Connecticut': 'CT', 'Montana': 'MT', 'California': 'CA', 'Massachusetts': 'MA', 'West Virginia': 'WV', 'South Carolina': 'SC', 'New Hampshire': 'NH', 'Wisconsin': 'WI', 'Vermont': 'VT', 'Georgia': 'GA', 'North Dakota': 'ND', 'Pennsylvania': 'PA', 'Florida': 'FL', 'Alaska': 'AK', 'Kentucky': 'KY', 'Hawaii': 'HI', 'Nebraska': 'NE', 'Missouri': 'MO', 'Ohio': 'OH', 'Alabama': 'AL', 'New York': 'NY', 'South Dakota': 'SD', 'Colorado': 'CO', 'New Jersey': 'NJ', 'Washington': 'WA', 'North Carolina': 'NC', 'District of Columbia': 'DC', 'Texas': 'TX', 'Nevada': 'NV', 'Maine': 'ME', 'Rhode Island': 'RI'}

In [7]:
area_dict = {'Mississippi': 125, 'Oklahoma': 181, 'Wyoming': 253, 'Minnesota': 225, 'Illinois': 149, 'Arkansas': 137, 'New Mexico': 314, 'Ohio': 116, 'Indiana': 94, 'Maryland': 32, 'Louisiana': 134, 'Idaho': 216, 'Arizona': 295, 'Wisconsin': 169, 'Michigan': 250, 'Kansas': 213, 'Utah': 219, 'Virginia': 110, 'Oregon': 254, 'Connecticut': 14, 'Montana': 380, 'California': 423, 'Texas': 695, 'West Virginia': 62, 'South Carolina': 82, 'New Hampshire': 24, 'Massachusetts': 27, 'Vermont': 24, 'Georgia': 153, 'North Dakota': 183, 'Hawaii': 28, 'Pennsylvania': 119, 'Florida': 170, 'Alaska': 1717, 'Kentucky': 104, 'Tennessee': 109, 'Nebraska': 200, 'Missouri': 180, 'Iowa': 145, 'Alabama': 135, 'Rhode Island': 4, 'South Dakota': 199, 'Colorado': 269, 'New Jersey': 22, 'Washington': 184, 'North Carolina': 139, 'New York': 141, 'District of Columbia': 0, 'Nevada': 286, 'Delaware': 6, 'Maine': 91}

In [8]:
popul_dict = {'Mississippi': '2.99', 'Oklahoma': '3.92', 'Delaware': '0.95', 'Minnesota': '5.52', 'Illinois': '12.8', 'Arkansas': '2.99', 'New Mexico': '2.08', 'Indiana': '6.63', 'Maryland': '6.02', 'Louisiana': '4.68', 'Idaho': '1.68', 'Wyoming': '0.59', 'Tennessee': '6.65', 'Arizona': '6.93', 'Iowa': '3.13', 'Michigan': '9.93', 'Kansas': '2.91', 'Utah': '3.05', 'Virginia': '8.41', 'Oregon': '4.09', 'Connecticut': '3.58', 'Montana': '1.04', 'California': '39.25', 'Massachusetts': '6.81', 'West Virginia': '1.83', 'South Carolina': '4.96', 'New Hampshire': '1.33', 'Wisconsin': '5.78', 'Vermont': '0.62', 'Georgia': '10.31', 'North Dakota': '0.76', 'Pennsylvania': '12.78', 'Florida': '20.61', 'Alaska': '0.74', 'Kentucky': '4.44', 'Hawaii': '1.43', 'United States': '323.13', 'Nebraska': '1.91', 'Missouri': '6.09', 'Ohio': '11.61', 'Alabama': '4.86', 'New York': '19.75', 'Puerto Rico Commonwealth': '3.41', 'South Dakota': '0.87', 'Colorado': '5.54', 'New Jersey': '8.94', 'Washington': '7.29', 'North Carolina': '10.15', 'District of Columbia': '0.68', 'Texas': '27.86', 'Nevada': '2.94', 'Maine': '1.33', 'Rhode Island': '1.06'}

In [9]:
states = dill.load(open('states.pkd', 'rb'))

In [11]:
colors

['#ff7f0e',
 '#ffbb78',
 '#2ca02c',
 '#98df8a',
 '#d62728',
 '#ff9896',
 '#9467bd',
 '#c5b0d5',
 '#8c564b',
 '#c49c94',
 '#e377c2',
 '#f7b6d2',
 '#7f7f7f',
 '#c7c7c7',
 '#bcbd22',
 '#dbdb8d',
 '#17becf',
 '#9edae5']

In [12]:
colors = [val[-1] for val in palette.values()]
codes = sorted([code for code in states])

state_xs = [states[state_dict[state]]["lons"] for state in state_names]
state_ys = [states[state_dict[state]]["lats"] for state in state_names]

## Exploratory Data Analysis: Tool for Self-Explorations

In [13]:
plot_title = 'US Tax Collection'
plot_height = 560
plot_width  = 820

In [15]:
def color_scale():
    p = figure(plot_width=60, plot_height=plot_height, tools='', 
               toolbar_location=None,
               x_range=(-1, 1), y_range = (-15,20) )
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    p.xaxis.visible = False
    p.yaxis.visible = False
    p.outline_line_alpha = 0.
    for ii in range(len(colors)):
        p.scatter([0], [ii], color=colors[ii], size=18)
    return p
scale = color_scale()

invisible_plot0 = figure(plot_width=60, plot_height=100, tools='', 
            toolbar_location=None, outline_line_alpha = 0.)
invisible_plot1 = figure(plot_width=60, plot_height=100, tools='', 
            toolbar_location=None, outline_line_alpha = 0.)
invisible_plot2 = figure(plot_width=60, plot_height=100, tools='', 
            toolbar_location=None, outline_line_alpha = 0.)
show(scale)

In [13]:
tax_cat = 'Total Taxes' 
calculated = 'Total Tax Collection ($K)'
year = year_options[0]
def get_source(df, tax_cat, calculated, year):
    state_colors = []
    tax_vals    =  []
    names = []
    popul = []
    areas = []
    
    for state in state_names:
        val = df[(df['Tax Category']==tax_cat)  & (df['calculated']==calculated) & (df['year']==int(year))][state].values[0]
        tax_vals.append(val)
        names.append(state)
        popul.append(popul_dict[state])
        areas.append(area_dict[state])
        
    idx_vals = [int(val/np.max(tax_vals)*17) for val in tax_vals]
    
    state_colors = [colors[idx] for idx in idx_vals]
    
    bottom = [0]*50
    right=[x+0.40 for x in range(1,51)]    
    left=[x+0.60 for x in range(50)]
    
    source = ColumnDataSource(data=dict(
    c=state_colors,
    names =names,
    val=tax_vals,
    popul=popul,
    areas=areas,
    bottom=bottom, right=right, left=left,
))
    
    return source

In [14]:
def make_plot(source, title=plot_title):
   
    # Choropleth
    TOOLS = "pan,hover,save, reset"
    p = figure(
               toolbar_location=None, 
               tools=TOOLS, 
               plot_width=plot_width, plot_height=plot_height, 
               x_range=(-130, -65), y_range=(20,51))
    p.title.text = title    
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    p.xaxis.visible = False
    p.yaxis.visible = False
    p.title.text_font_size = '14pt'
    p.outline_line_alpha = 0.

    p.patches(state_xs, state_ys, 
                      fill_alpha=0.8, fill_color='c', source=source,
                      line_color="white", line_width=0.5)

    hover = p.select_one(HoverTool)

    hover.point_policy = "follow_mouse"
    hover.tooltips = [("State", "@names"),("Collection", "@val"), 
                      ("Population","@popul M"), ("Area","@areas k sq km")]  
    
    return p

In [15]:
def make_bar1(source):    # bar1
    val = source.data['val']
    names = source.data['names']
    c= source.data['c']

    tups = zip(val, names, c)
    tups = sorted(tups, reverse=True)

    val   = [tup[0] for tup in tups]
    names = [tup[1] for tup in tups]
    c     = [tup[2] for tup in tups]
    bottom = [0]*50

    right=[x+0.40 for x in range(1,51)]    
    left=[x+0.60 for x in range(50)]


    source2 = ColumnDataSource(data=dict(
        c=c,
        names =names,
        val=val,
        bottom=bottom, right=right, left=left,
    ))

    tooltips_bar0 = [ ('', "@names")]

    hover0 = HoverTool(tooltips=tooltips_bar0)

    bar1 = figure(plot_width=plot_width, plot_height=100,  
                             toolbar_location=None,  tools=[hover0, "pan"], x_range=(-2, 54))
    bar1.quad(top='val', bottom='bottom', left='left',
                                right='right', color='c', 
                                source=source,
                                fill_alpha=1)
    bar1.xgrid.grid_line_color = None
    bar1.ygrid.grid_line_color = None
    bar1.xaxis.visible = False
    bar1.yaxis.visible = False
    bar1.outline_line_alpha = 0.
    
    return bar1

In [16]:
def update_plot(attrname, old, new):
    src = get_source(df, cat_select.value, calculated_select.value, year_select.value)
    plot.title.text = "Choropleth Map of " + calculated_select.value + ' from ' + cat_select.value.strip(' -') \
            + ' in ' + year_select.value
    
    source.data.update(src.data)

In [17]:

cat_select = Select(value=tax_cat, title='Select Tax Category / Subcategory', options=tax_types)
calculated_select = Select(value=calculated, title='Select Normalization', 
                    options=['Total Tax Collection ($K)', 'Collection Per Capita ($)'])
year_select = Select(value=year, title='Select Tax Year for Explorations', options=year_options)


source = get_source(df, tax_cat, calculated, year)
plot = make_plot(source, title='Exploration Tool For US Tax Collections By States')
bar1 = make_bar1(source)

cat_select.on_change('value', update_plot)
calculated_select.on_change('value', update_plot)
year_select.on_change('value', update_plot)

controls1 = widgetbox(year_select, cat_select, calculated_select)


Supplying a user-defined data source AND iterable values to glyph methods is deprecated.

See https://github.com/bokeh/bokeh/issues/2056 for more information.

  warn(message)
Supplying a user-defined data source AND iterable values to glyph methods is deprecated.

See https://github.com/bokeh/bokeh/issues/2056 for more information.

  warn(message)


##  Times-Series Predictions

In [18]:
state2_options = ['United States'] + state_names
tax_cat2 = 'Total Taxes'
calculated2 = 'Total Tax Collection ($K)'
state2 = 'United States'

In [19]:
def index_date_convert(df2):
    def date_convert(aa):
        return str(aa)+'-01-01'

    df2.year = df2.year.apply(date_convert)
    df2.year = pd.to_datetime(df2.year)
    df2.index = df2.year
    df2.set_index('year', inplace=True)
    return df2

In [20]:
def get_source_bar(df, tax_cat2, calculated2, state2):
    
    vals_df = df[(df['Tax Category']==tax_cat2) & 
                  (df['calculated']==calculated2)][[state2, 'year']]
    vals_df = index_date_convert(vals_df)    
    
    p_max = 5
    q_max = 5    
    d = {}
    
    for p in range(1, p_max+1):
        for q in range(1, q_max+1):
            try:
                arma_model = ARMA(vals_df[-6:],(p,q)).fit()
                if arma_model.aic > 0:
                    d[(p,q)] = arma_model.aic
            except:
                continue

    lst = sorted(d.items(), key=lambda x: x[1], reverse=False) 
    
    if lst:
        p, q = lst[0][0]
        arma_model = ARMA(vals_df[-6:],(p,q)).fit()
        preds = arma_model.predict('2017-01-01', '2018-01-01').values.tolist()
        preds = [max(ii, 0) for ii in preds]
        
        vals = vals_df[state2].tolist() + preds

        years = [str(year)+' (actual value)' for year in range(2004, 2017)]
        years = years + ['2017 (projected)','2018 (projected)']
        
        bottom = [0]*15
        right=[x+0.40 for x in range(2004, 2019)]

        left=[x+0.60 for x in range(2003, 2018)]
        color=["#B3DE69"]*13+['red']*2
    
    else:
        vals = vals_df[state2].tolist()
        years = [str(year)+' (actual value)' for year in range(2004, 2017)]
        bottom = [0]*13
        right=[x+0.40 for x in range(2004, 2017)]

        left=[x+0.60 for x in range(2003, 2016)]
        color=["#B3DE69"]*13    
    
    
    source_bar = ColumnDataSource( data=dict(
            vals=vals, years=years, 
            bottom=bottom, right=right, left=left, color=color,
        ))
        
    return source_bar

In [21]:
def make_bar(source_bar, title='plot title'):
    
    tooltips = [
                ("Tax Collection:", "@vals"),
                ("Year:", "@years")]
    hover = HoverTool(tooltips=tooltips)

    bar = figure(plot_width=plot_width, plot_height=360, 
                     toolbar_location=None, tools=[hover], x_range=(2003, 2019))
    bar.quad(top='vals', bottom='bottom', left='left',
                        right='right', color='color',
                        source=source_bar,
                        fill_alpha=.6)
    
    bar.title.text = title
    bar.title.text_font_size = '14pt'
    bar.xaxis.minor_tick_line_color = None
    bar.xaxis.major_tick_line_color = None
    bar.yaxis.minor_tick_line_color = None
    bar.yaxis.major_tick_line_color = None
    bar.xgrid.grid_line_color = None
    bar.ygrid.grid_line_color = None
    bar.xaxis.visible = False
    bar.yaxis.visible = False 
    bar.outline_line_alpha = 0.
    return bar

In [22]:
def update_bar(attrname, old, new):
    src = get_source_bar(df, cat2_select.value, calculated2_select.value, state2_select.value)
    bar.title.text = calculated2_select.value + ' from ' + cat2_select.value.strip(' -') +\
            ' - ' + state2_select.value
        
    source_bar.data.update(src.data)

In [23]:

cat2_select = Select(value=tax_cat2, title='Select Tax Category / Subcategory', options=tax_types)
calculated2_select = Select(value=calculated2, title='Select  Normalization', 
                    options=['Total Tax Collection ($K)', 'Collection Per Capita ($)'])
state2_select = Select(value=state2, title='Select US State for Predictions', options=state2_options)

source_bar = get_source_bar(df, tax_cat2, calculated2, state2)
bar = make_bar(source_bar, "US Tax Collections and Times-Series Predictions ")

cat2_select.on_change('value', update_bar)
calculated2_select.on_change('value', update_bar)
state2_select.on_change('value', update_bar)

controls2 = widgetbox(state2_select, cat2_select, calculated2_select)


  return np.log(self.sigma2) + (1 + self.df_model) * np.log(nobs)/nobs
  newparams = ((1-np.exp(-params))/
  (1+np.exp(-params))).copy()
  (1+np.exp(-params))).copy()
  tmp = ((1-np.exp(-params))/
  (1+np.exp(-params))).copy()
  (1+np.exp(-params))).copy()


In [24]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import normalize
from sklearn import metrics

In [25]:
def get_source_kmeans(df, cat1, cat2, cat3, cat4, cat5, calculated, year):
    state_colors = []
    cats = []
    
    for cat in [cat1, cat2, cat3, cat4, cat5]:

        if cat!='No Selection':
            cat_vals = df[(df['Tax Category']==cat) & 
                          (df['calculated']==calculated) & (df.year==int(year))].iloc[:, 2:-2]
            cat_vals = normalize(cat_vals.values)[0]

        else:
            cat_vals = [0]*50

        cats.append(cat_vals)
    
    data = [ (cats[0][ii], cats[1][ii], cats[2][ii], cats[3][ii], cats[4][ii])
           for ii in range(50)]   
    
    
    if data[0] != (0,0,0,0,0):
        scores = []
        for n in range(4,10):

            k_means = KMeans(n_clusters=n).fit(data)
            y = k_means.predict(data)
            scores.append((metrics.silhouette_score(data, y, metric='euclidean'), n))
            scores = sorted (scores, reverse=True)
        n = scores[0][1]
        y = KMeans(n_clusters=n).fit(data).predict(data)
    else:
        y = [0]*50
        n=1
        
    state_colors = [colors[2*idx] for idx in y]

    source_kmeans = ColumnDataSource(data=dict(
    c = state_colors,
    names = df.columns[2:-2],
    n =[n]*50,
))
    return source_kmeans

In [26]:
def make_plot_kmeans(source_kmeans):

    title='Geographical Division (Similar Tax Behavior) Based On Selected Subcategories: ' 
#    + str( source_kmeans.data['n'][0] ) + ' Groups by Tax Behaviors'
    
    TOOLS = "pan,hover,save, reset"
    p = figure(
               toolbar_location=None, 
               tools=TOOLS, 
               plot_width=plot_width, plot_height=plot_height, 
               x_range=(-130, -65), y_range=(19,51))
    p.title.text = title
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    p.xaxis.visible = False
    p.yaxis.visible = False
    p.title.text_font_size = '14pt'
    p.outline_line_alpha = 0.

    p.patches(state_xs, state_ys, 
                      fill_alpha=0.8, fill_color='c', source=source_kmeans,
                      line_color="white", line_width=0.5, )

    hover = p.select_one(HoverTool)

    hover.point_policy = "follow_mouse"
    hover.tooltips = [("State", "@names")]    
    return p

In [27]:
def update_kmeans(attrname, old, new):
    
    src_kmeans = get_source_kmeans(df, k1_select.value, k2_select.value, k3_select.value, 
                     k4_select.value, k5_select.value,
                     calculated3_select.value, year3_select.value)
    
    source_kmeans.data.update(src_kmeans.data)
    n = source_kmeans.data['n'][0]
    plot_kmeans.title.text = 'Classification Based On Selected Categories: ' \
        + str(n) + ' Clusters of Similar Tax Behaviors (in ' + str(year3_select.value) + ')'
    
cat1 = 'No Selection'
cat2 = 'No Selection'
cat3 = 'No Selection'
cat4 = 'No Selection'
cat5 = 'No Selection'

calculated3 = 'Total Tax Collection ($K)'
year3 = year_options[0]
tax_selected = ['No Selection'] + tax_types

k1_select = Select(value=cat1, title='Add 1st Tax Category / Subcategory', options=tax_selected)
k2_select = Select(value=cat2, title='Add 2nd Tax Category / Subcategory', options=tax_selected)
k3_select = Select(value=cat3, title='Add 3rd Tax Category / Subcategory', options=tax_selected)
k4_select = Select(value=cat4, title='Add 4th Tax Category / Subcategory', options=tax_selected)
k5_select = Select(value=cat5, title='Add 5th Tax Category / Subcategory', options=tax_selected)

calculated3_select = Select(value=calculated3, title='Select Normalization', 
                    options=['Total Tax Collection ($K)', 'Collection Per Capita ($)'])
year3_select = Select(value=year3, title='Select Tax Year for Classifications', options=year_options)

source_kmeans = get_source_kmeans(df, cat1, cat2, cat3, cat4, cat5, calculated3, year3)
plot_kmeans = make_plot_kmeans(source_kmeans)

k1_select.on_change('value', update_kmeans)
k2_select.on_change('value', update_kmeans)
k3_select.on_change('value', update_kmeans)
k4_select.on_change('value', update_kmeans)
k5_select.on_change('value', update_kmeans)

calculated3_select.on_change('value', update_kmeans)
year3_select.on_change('value', update_kmeans)

controls3 = widgetbox(year3_select, calculated3_select, 
                     k1_select, k2_select, k3_select, k4_select, k5_select)


Supplying a user-defined data source AND iterable values to glyph methods is deprecated.

See https://github.com/bokeh/bokeh/issues/2056 for more information.

  warn(message)
Supplying a user-defined data source AND iterable values to glyph methods is deprecated.

See https://github.com/bokeh/bokeh/issues/2056 for more information.

  warn(message)


In [28]:
div1 = Div(text="""
<br><br><br><br>
<p style= "text-align:center; font-size:20px;">
<b>US TAX COLLECTIONS: EXPLORATORY DATA ANALYSIS </b><br><br> </p>

<p style= "text-align:left; font-size:20px; margin-left: 55px">

<b>Tax colections</b> are classsified into five major categories (and twenty-five subcategories):</p>

<ul class="a"; style= "text-align:justify; font-size:20px; margin-left: 65px; line-height: 1.6">
  <li>Property Taxes</li>
  <li>Sale and Gross Receipt Taxes</li>
  <li>Licenses Taxes</li>
  <li>Income Taxes</li>
  <li>Other Taxes</li>
</ul>

<p style= "text-align:left; font-size:20px; margin-left: 55px">
Some (fun) facts about tax collections in the United States:
</p>

<ul class="a"; style= "text-align:justify; font-size:20px; margin-left: 65px; line-height: 1.7">
  <li>Nevada, Washington, Texas, South Dakota and Wyoming do not collect <i>individual income taxes</i></li>
  <li>There are no <i>general sales and gross receipts taxes</i> 
  in Alaska, Delaware, Montana, New Hampshire and Oregon</li>
  <li><i>Total tax collections</i> in Alaska have decreased seven-fold since 2012 </li>
  <li>Three states - California, New York and Texas, 
  account for more than 30 percent of <i>total tax collections</i> in 2016</li>
  <li>California and New York account for 43 percent of <i>total income tax collections</i> in 2016</li>
</ul>
""",
width=1150, height=630)

# Choropleth map 

In [29]:
div2 = Div(text="""
<p style= "text-align:center; font-size:20px;">
<br><br><br>
<b>MACHINE LEARNING: TIME-SERIES PREDICTIONS </b><br><br> </p>

<p style= "text-align:left; font-size:20px; margin-left: 55px; line-height: 1.7">

<b>How it works: </b> Time-Series analysis is used to run predictions for future tax collections</p>

<ul class="a"; style= "text-align:justify; font-size:20px; margin-left: 65px; line-height: 1.7">
  <li>Learning technique: Autoregressive (AR) models use data from previous time-points to predict the next time-point, 
  while moving average (MA) models, as opposed to AR, take the previous error terms instead 
  of the previous values as inputs.
  </li>
  <li>Here the integrated model (ARMA) is used for the predictions of future tax revenues, 
  by first performing grid search to obtain the optimum values p 
  (the amount of lag that is useful for predicting future values) 
  and q (the number of previous errors to consider).</li>
</ul>


""",
width=1150, height=440)
#show(div2)

In [30]:
div3 = Div(text="""
<p style= "text-align:center; font-size:20px;">
<br><br><br>
<b>MACHINE LEARNING: UNSUPERVISED CLASSIFICATIONS </b><br><br> </p>

<p style= "text-align:justify; font-size:20px; margin-left: 55px; line-height: 1.7">

<b>How it works: </b> Unsupervised learning (K-Means) is used to classify the tax behaviors across the US, 
    based on chosen tax categories or subcategories from user selections.</p>

<ul class="a"; style= "text-align:justify; font-size:20px; margin-left: 65px; line-height: 1.7">
  <li>Learning technique: K-Means is a clustering algorithm that assumes k clusters, 
  and then computes these clusters based on the attributes of the available data. 
  The algorithm takes the whole data set and iterates over its attributes 
  to determine the clusters based around centers, known as centroids. 
  </li>
  <li>Here the number of centroids (k) is 
  selected with Silhouette analysis on K-Means clustering for each data set.</li>
</ul>


""",
width=1150, height=400)
#show(div3)

In [31]:
div4 = Div(text="""<br>Data source: US Census Bureau
""",
width=1150, height=40)
show(div4)

In [32]:
layout = column(
                div1, row (invisible_plot0, column(plot, bar1), controls1), 
                div2,
                row(invisible_plot1, bar, controls2),
                div3,
                row(invisible_plot2, plot_kmeans, controls3),   
                div4,
               )

curdoc().add_root(layout)
curdoc().title = "Hieu H. Pham"


show(layout)


In [34]:
!jupyter nbconvert --to python week8.ipynb

[NbConvertApp] Converting notebook week8.ipynb to python
[NbConvertApp] Writing 22535 bytes to week8.py
