In [1]:
# import libraries
import pandas as pd
import numpy as np
import time
import datetime 
import spacy
from pickle import load

from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
import dash_table
from dash.dependencies import Input, Output, State

from my_DashFunc import *

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
# load spacy language library
nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner'])

In [3]:
# add new stop words
new_sw = [' ', '\n', '\n\n']
for w in new_sw:
    nlp.Defaults.stop_words.add(w)
    nlp.vocab[w].is_stop = True

# remove default stop wrods
def_sw = ["n't", 'n’t', 'again', 'against', 'down', 'neither', 'never',
           'not', 'no']
for w in def_sw:
    nlp.Defaults.stop_words.remove(w)
    nlp.vocab[w].is_stop = False  

In [4]:
# preload dataframes to save exec. time
df_500 = create_df('data/off_500.csv')
df_1000 = create_df('data/off_1000.csv')
df_1500 = create_df('data/off_1500.csv')
df_2000 = create_df('data/off_2000.csv')
df_2500 = create_df('data/off_2500.csv')
df_dict = {'df_500': df_500, 'df_1000': df_1000, 'df_1500': df_1500,
           'df_2000': df_2000, 'df_2500': df_2500}

# pre-generate word frequency for word cloud
pos_wc_dict = {}
neg_wc_dict = {}
for k, v in df_dict.items():
    pos_wc_dict[k] = get_freq_words(v, 1, nlp, single_word=True)
    neg_wc_dict[k] = get_freq_words(v, 0, nlp, single_word=True)

In [5]:
# parse model summary to dash table
def parse_summ():
    '''
    Signature:   parse_summ()
    Doctring:    convert model summary data into dash table
    Parameter:   None
    '''
    # load model here else get tensorflow graph object error
    model = load_model('dump/sentiment_w2v_wo_3_lc_stpwd.h5')

    # convert model.summary() into string
    strlist = []
    model.summary(print_fn=lambda x: strlist.append(x))
#     mdl_str = "\n".join(strlist)
    
    tmp = []
    for i in strlist:
        if not i.startswith('_') and not i.startswith('='):
            tmp.append(i)    
        

    return html.Table(
        # body
        [html.Tr([html.Td(i[0:29]), html.Td(i[29:50]), html.Td(i[50:len(i)])])
         if not i.startswith('Non-train') else html.Tr(html.Td(i[0:35])) 
         for i in tmp[0:len(tmp)]
        ]
        )

In [6]:
# parse model summary to dash table
dcc_tbl_mdl_sum = parse_summ()

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [7]:
# create Dash app
app = JupyterDash(__name__)

# create dropdown dict that correspond to predefined csv files in data folder
drop_down = [{'label':f'{i} Reviews', 'value':f'df_{i}'}
             for i in range(500, 3000, 500)]

# image file locatioin
path = 'img/'
img_banner = 'ai.png'
img_pred = 'pred.png'

# css style
css_div_lvl_min_width = '1200px'
css_pad_l = '5px'
css_pad_r = '5px'
txt_height = '42px'
css_border = '1px solid #a6d5e3'
css_bor_rad = '4px'
css_df_bgc = '#a6cfe6'
css_wc_bgc = '#a6cfe6'
css_mdl_bgc = '#86b1c9'
css_clf_bgc = '#86b1c9'
css_conf_bgc = '#86b1c9'
css_conf_pred = '#5b89a3' 
css_slg_bgc = '#39628b'
css_lab = '#F8F562'

css_main_div = {'width':'99%', 'font-family':'Verdana', 'font-size':'0.75em'}
css_banner = {'width':'100%', 'height':'140px', 'border-radius':css_bor_rad}

# dash table styling
css_tbl_header = {'backgroundColor': '#dde7ed', 'fontWeight': 'bold'}
css_tbl_cell = {'textAlign':'left','whiteSpace':'normal', 'height':'auto'}
css_tbl_condl = [{'if': {'row_index': 'odd'}, 'backgroundColor': '#eef8fe'}]                

# df table top div section
css_div_lvl1 = {'min-width':css_div_lvl_min_width}
css_cont_df = {'width':'60%', 'float':'left'}
css_df_tbl_top = {'height':txt_height, 'font-weight':'bold', 'border':css_border, 
    'border-radius': css_bor_rad, 'background-color':css_df_bgc}
css_sel_lab = {'line-height':txt_height, 'vertical-align':'middle',
    'padding-left':css_pad_l, 'padding-right':css_pad_r, 'float':'left',}
css_dd = {'width':'160px', 'height':'25px', 'float':'left',
    'display':'inline-block', 'padding-top':'8px'}
css_pos_neg = {'text-align':'right', 'padding-right':'15px','padding-top':'4px'}
css_df_output = {'height':'450px', 'float':'left', 'overflow':'scroll', 
    'border':css_border, 'border-radius': css_bor_rad, 'padding':'2px', 
    'margin-top':'2px', }

# word cloud section
css_cont_wc = {'float':'left', 'width':'39.5%', 'margin-left':'2px'}
css_wc_top = {'height':txt_height, 'font-weight':'bold', 'border':css_border, 
    'border-radius': css_bor_rad, 'background-color':css_wc_bgc}
css_wc_lab = {'width':'auto','float':'left', 'vertical-align':'middle',
    'line-height':txt_height, 'padding-left':css_pad_l,
    'padding-right':css_pad_r,}
css_rad_div = {'float':'left', 'height':txt_height}
css_wc_output = {'width':'100%', 'float':'left', 'margin-top':'3px',
    'border':css_border, 'border-radius': css_bor_rad}

# model, classification report, confusion matrix section
css_div_lvl2 = {'width':'98%', 'min-width':css_div_lvl_min_width,
    'position':'absolute','margin-top':'505px','height':'405px',
    'text-align':'center'}
css_sub_cont = {'width':'32.5%', 'height':'400px', 'margin-right':'5px',
    'display':'inline-block', 'position':'relative'}
css_sub_cont_v= {'width':'32.5%', 'height':'400px', 'margin-right':'5px',
    'display':'inline-block', 'position':'relative', 'visibility':'visible'}
css_sub_cont_h = {'width':'32.5%', 'height':'400px', 'margin-right':'5px',
    'display':'inline-block', 'position':'relative', 'visibility':'hidden'}

css_cont_mdl = css_sub_cont
css_mdl_lab = {'width':'100%', 'height':txt_height, 'position':'absolute',
    'line-height':txt_height, 'vertical-align':'middle', 'font-weight':'bold',
    'border':css_border, 'border-radius': css_bor_rad,
    'background-color':css_mdl_bgc}
css_mdl_output = {'height':'88%', 'width':'100%', 'float':'left',
    'border':css_border, 'border-radius': css_bor_rad, 'margin-top': '47px',
    'text-align':'left'}
css_set_button = {'margin':'30px 0px 0px 30px'}

css_cont_clsrpt = css_sub_cont_h
css_clsrpt_lab = {'width':'100%', 'height':txt_height, 'position':'absolute',
    'line-height':txt_height, 'vertical-align':'middle', 'font-weight':'bold',
    'border':css_border, 'border-radius': css_bor_rad,
    'background-color':css_clf_bgc}
css_clsrpt_output = {'height':'87%', 'width':'99%', 'float':'left',
    'border':css_border, 'border-radius': css_bor_rad, 'margin-top': '47px',
    'text-align':'left', 'font-size':'1.4em', 'padding':'2px',
    'position':'absolute'}

css_cont_confmat = css_sub_cont_h
css_confmat_lab = {'width':'100%', 'height':txt_height, 'position':'absolute',
     'line-height':txt_height, 'vertical-align':'middle', 'font-weight':'bold',
    'border':css_border, 'border-radius': css_bor_rad,
    'background-color':css_conf_bgc}
css_confmat_output = {'height':'87%', 'width':'99%', 'float':'left',
    'border':css_border, 'border-radius': css_bor_rad, 'margin-top': '47px',
    'text-align':'left', 'font-size':'1.4em', 'padding':'2px',
    'position':'absolute'}

# single prediction section
css_div_lvl3 = {'width': '99%', 'height':'150px', 'display':'block',
    'min-width':css_div_lvl_min_width, 'margin-top':'910px', }
css_cont_result = {'width':'30%', 'height':'99%', 'float':'left',
    'border':css_border, 'border-radius': css_bor_rad, 'position':'relative'}
css_pred_lab = {'height':txt_height, 'line-height':txt_height,
    'margin-bottom':'2px', 'text-align':'center', 'font-weight':'bold',
    'font-size':'1.4em', 'color':css_lab, 'border':css_border,
    'border-radius': css_bor_rad, 'background-color':css_conf_pred}
css_slg_result ={'font-weight':'bold', 'font-size':'1.3em', 'color':'#ffffff',
    'text-align':'center', 'position':'absolute', 'margin-top':'-140px',
    'margin-left':'20px'}
css_sgl_pred_button = {'width':'80px','position':'absolute',
    'margin-top':'40px', 'margin-left':'-340px'}

# input text area
css_cont_input_text = {'width':'69.5%', 'height':'99%', 'float':'left', 
    'border':css_border, 'border-radius': css_bor_rad, 'margin-left':'2px',}
css_input_box = {'height':'94%', 'width':'98.5%', 'margin': '2px 2px 2px 2px'}

# Dash layout    
app.layout = html.Div(id='main-div', children=[
    # top banner 
    html.Img(src=encode_image(path+img_banner), style=css_banner),
    # level 1 container
    html.Div([
        # div containing df select and table
        html.Div([
            html.Div(id='df-tbl-top', children=[
                html.Div('Select Set of Product Reviews:', style=css_sel_lab),  
                html.Div(dcc.Dropdown(id='df-picker',
                    options=drop_down,
                    value=drop_down[0]['value'], style=css_dd)
                ),
                html.Div('Percent of Positive', id='pos', style=css_pos_neg),
                html.Div('Percent of Positive', id='neg', style=css_pos_neg),
            # df output table
            ], style=css_df_tbl_top),
            html.Div(id='df-output', style=css_df_output),
        ], style=css_cont_df),
    
    # div containing wc labels and plot
        html.Div([
            html.Div([
                html.Div('Select Most Common Words:', style=css_wc_lab),
                html.Div(dcc.RadioItems(id='wc-radio',
                    options=[{'label':'Positive Reviews', 'value':1},
                        {'label':'Negative Reviews', 'value':0}],
                        value=1,
                        labelStyle={'display':'block'},
                        ), style=css_rad_div),
            ], style=css_wc_top),
            html.Div(
                dcc.Graph(id='word-cloud', config={'displayModeBar':False}), 
                style=css_wc_output),
        ], style=css_cont_wc),
    ], style=css_div_lvl1),
    
    # leve 2 conctainer
    html.Div([
        # model summary container
        html.Div([
            html.Div('Model Summary', style=css_mdl_lab),
            html.Div([
                dcc_tbl_mdl_sum,
                html.Button('Get Prediction', id='get-pred',
                    style=css_set_button)      
                ], style=css_mdl_output),

        ], style=css_cont_mdl),
        # cls rpt container
        html.Div(id='div-clsrpt', children=[
            html.Div('Classification Report', id='clsrpt-time',
                style=css_clsrpt_lab),
            dcc.Loading(
                id='loading-icon',
                children=html.Div(id='cls-rpt'),
                type='default'
            ),
        ], style=css_cont_clsrpt),
 
        # conf. matrix container
        html.Div(id='div-confmat', children=[
            html.Div('Confusion Matrix', style=css_confmat_lab),
            html.Div(
                dcc.Graph(id='conf-matx', figure={}),
                style=css_confmat_output
            ),
        ], style=css_cont_confmat),
    ], style=css_div_lvl2),
    
    # single prediction container
    html.Div([
        # prediction result and button
        html.Div('Sandbox', style=css_pred_lab),
        html.Div([
            dcc.Loading(
                id='loading-sing-pred', 
                children=html.Div([
                    html.Img(src=encode_image(path+img_pred),
                        style={'width':'100%','height':'150px'}),
                    html.Div(id='single-pred-result', style=css_slg_result),
                    html.Button('Try Me!', id='get-sgl-pred',
                        style=css_sgl_pred_button)
                ]),
                type='default'
                ),
        ], style=css_cont_result),
        # input text section
        html.Div(
            dcc.Textarea(
                id='rev-input',
                value='Enter your texts here.',
                style = css_input_box,
            ),
            style=css_cont_input_text
        ),
    ], style=css_div_lvl3),

], style=css_main_div)    

# display selected dataframe
@app.callback(
    [Output('df-output', 'children'),
     Output('get-pred', 'n_clicks'),
     Output('pos', 'children'),
     Output('neg', 'children')],
    [Input('df-picker', 'value')]
)
def show_df(selected_df):
    df = df_dict.get(selected_df)
    # parse df to dash table
    html_div = html.Div([
        dash_table.DataTable(
            data = df.to_dict('records'),
            columns = [{'name': i, 'id':i} for i in df.columns],
            page_size=20,
            sort_action ='native',
            style_table={'overflowy':'auto'},
            style_header=css_tbl_header,
            style_cell=css_tbl_cell,
            style_data_conditional=css_tbl_condl
            )
        ], style={'font-family': 'Verdana', 'font-size':'1.4em'})

    p_n_ratio = round(df['label'].value_counts(normalize=True), 2)
    pos = f'% Positive Review: {p_n_ratio[1]}'
    neg = f'% Negative Review: {p_n_ratio[0]}'
    return html_div, None, pos, neg

# plot word cloud
@app.callback(
    Output('word-cloud', 'figure'),
    [Input('df-picker', 'value'),
     Input('wc-radio', 'value')],
)
def show_wc(selected_df, label):
    df = df_dict.get(selected_df)
    fig = go.Figure()
    if label == 0:
        wc_df = neg_wc_dict.get(selected_df)
        # create WordCoud object
        wc = WordCloud(background_color="white")
        # generate word cloud
        wc.generate_from_frequencies(dict(wc_df))
        title = '<b>Top 50 Words from Negative Reviews'
    else:
        wc_df = pos_wc_dict.get(selected_df)
        # create WordCoud object
        wc = WordCloud(background_color="white")
        # generate word cloud
        wc.generate_from_frequencies(dict(wc_df))
        title = '<b>Top 50 Words from Positive Reviews</b>'
        
    fig.add_trace(go.Image(z=wc))
  
    fig.update_layout(
        title={'text':title, 'x':0.5, 'y':0.85, 'xanchor':'center'},
        hovermode=False, autosize=True, margin=dict(l=5, r=5, b=0, t=0),
        font=dict(size=15, color='#c5a654')
    )
    fig.update_xaxes(showticklabels=False, showline=True,
        linewidth=1, linecolor='#88bccb', mirror=True)
    fig.update_yaxes(showticklabels=False, showline=True,
        linewidth=1, linecolor='#e4c02e', mirror=True)
   
    return fig
   
# display classification reoprt
@app.callback(
    [Output('loading-icon', 'children'),
     Output('div-clsrpt', 'style'),
     Output('get-pred', 'disabled'),
     Output('clsrpt-time', 'children')],
    [Input('df-picker', 'value'),
    Input('get-pred', 'n_clicks')]
    )
def show_clsrpt(selected_df, n_clicks):
    if n_clicks is not None and n_clicks > 0:
        start = time.time()
        df = df_dict.get(selected_df)
        pred = get_pred(df, nlp)
        df_cr = pd.DataFrame.from_dict(classification_report(
                    df['label'], pred, output_dict=True)).T
        df_cr = df_cr.reset_index()
        cls_rpt = html.Div([
            dash_table.DataTable(
                data = df_cr.round(3).to_dict('records'),
                columns = [{'name': i, 'id': i} for i in df_cr.columns],
                style_header=css_tbl_header,
                style_cell=css_tbl_cell,
                style_data_conditional=css_tbl_condl
            )
        ], style=css_clsrpt_output)
        end = time.time()
        total_time = str(datetime.timedelta(seconds = end - start))
        ret_text = f'Classification Report: Model Exec. Time ({total_time[0:10]})'
        return cls_rpt, css_sub_cont_v, True, ret_text
    else:
        return None, css_sub_cont_h, False, None
    
# display confusion matrix
@app.callback(
    [Output('conf-matx', 'figure'),
     Output('div-confmat', 'style')],
    [Input('df-picker', 'value'),
     Input('get-pred', 'n_clicks')],
)
def show_confmatx(selected_df, n_clicks):
    if n_clicks is not None and n_clicks > 0:
        df = df_dict.get(selected_df)
        pred = get_pred(df, nlp)
        fig = plot_confmatx(df['label'], pred)
                                         
        return fig, css_sub_cont_v
    else:
        return {}, css_sub_cont_h    

# get sigle prediction
@app.callback(
    Output('single-pred-result', 'children'),
    [Input('get-sgl-pred', 'n_clicks')],
    [State('rev-input', 'value')]
)
def get_user_rev_pred(n_clicks, value):
    if n_clicks is not None and n_clicks > 0:
        pred = get_single_pred(value, nlp)
        tried_pred = f'Prediction {n_clicks} result: {pred[0]}'
        return tried_pred 
    else:
        return 'Click button to get prediciton'
  
app.run_server(mode='external')

Dash app running on http://127.0.0.1:8050/
