In [1]:
import io
import dash
import flask
import datetime
import regex as re
import pandas as pd
import regex
import pickle
import numpy as np
import plotly.graph_objs as go
import dash_core_components as dcc
import dash_html_components as html
import dash_table_experiments as dt
import plotly.plotly as py
import plotly.figure_factory as ff
from dash.dependencies import Input, Output, State
from sklearn import preprocessing

In [2]:
# link-input container
link_input = html.Div(
    children=[
        html.H3('Enter a product link...', style={'text-align': 'center'}),
        html.Div(
            children=[
                dcc.Input(
                    placeholder='Enter a product link...',
                    id='link',
                    value=None,
                    type="text",
                    style={'width': '100%'})
            ],
            className='nine columns',
            style={'margin-left': 2}),
        html.Div(
            children=[
                html.Button(
                    'Submit',
                    type='button',
                    id='submit-button',
                    className='button-primary',
                    style={'width': '100%'}),
            ],
            className='three columns',
            style={'margin-left': 2}),
    ],
    className='row')

In [3]:
# link-input container
def create_info_container(df,image_src,sentiment):
    return html.Div(
        children=[
            html.
            Div(children=[
                html.Img(
                    id='image',
                    src=
                    image_src,
                    style={
                        'max-height': 200,
                        'max-width': 200,
                        'margin-left': 'auto',
                        'margin-right': 'auto',
                        'display': 'block'
                    })
            ],
                className='three columns',
                style={'margin': 5, 'max-height': 200, 'background-color': 'white'}),
            html.Div(
                children=[
                    html.H4(df.iloc[0].article, style={'margin-top': 0, 'margin-bottom': 0}),
                    html.Div(
                        children=[
                        html.Div(
                            className='rating-bar',
                            children=[
                                html.Div(
                                    className='rating',
                                    style={
                                        'width': str(sum(sentiment)/len(sentiment) * 100) + '%',
                                        'height': 10
                                    })
                            ],
                            style={'height': 10,'margin-left': 2,'max-width': 132}),
                            html.Div(
                                children=[],
                                className='three columns',
                                style={'margin-left': 2}),
                        ],
                        className='row')
                ],
                className='nine columns',
                style={'margin-left': 10}),
        ],
        className='row', style={'margin-top': 40, 'borderRadius': '4px', 'background-color': 'lightgray'})

In [4]:
def generate_snippet(texts, highlights, lookahead):
    snippets = []
    #texts = texts.tolist()
    #highlights = highlights.tolist()

    for i, text in enumerate(texts):
        highlight = highlights[i]
        highlight_tokens = highlight.split(" ")
        tokens = text.split(" ")
        #tokens_x = x[i].split(" ")# Split string into a list of tokens
        tokens_x = text.lower()
        #print(len(tokens))
        #print(len(tokens_x))

        found_index_start = tokens_x.index(highlight_tokens[0])
        found_index_end = tokens_x.index(highlight_tokens[-1])

        # get position of last char of found_index_end
        k = 0
        tmp_char = 'abc'
        while tmp_char != ' ':
            if found_index_end + k < len(tokens_x):
                tmp_char = tokens_x[found_index_end + k]
                k += 1
            else:
                tmp_char = ' '

        try:
            start_index = tokens_x.index(highlight_tokens[0])
            end_index = tokens_x.index(highlight_tokens[-1]) + k
            found_snippet = html.Span(
                text[found_index_start:found_index_end + k],
                style={'background-color': 'springgreen'})
            start_snippet = " ".join(
                text[0:start_index].split(" ")[-lookahead:])
            end_snippet = " ".join(text[end_index:-1].split(" ")[:lookahead])
            if lookahead < len(text[0:start_index].split(" ")):
                snippet = '...{}'.format(start_snippet)
            if lookahead < len(text[end_index:-1].split(" ")):
                snippet = '{}...'.format(end_snippet)
            snippet = html.Div(
                [start_snippet, found_snippet, end_snippet])
            snippets.append(snippet)

        except ValueError:
            snippet = ""  # No snippet or whatever error handling you are going to do

    return snippets

In [5]:
def generate_snippet(review, phrase, topic):
    try:
        review = review.lower()
        tokens = review.split(" ")
        topic = topic.lower()
        phrase_words = phrase.split()

        found_indexes = []


        for i, token in enumerate(tokens):
            for k in range(len(phrase_words)):
                matches = regex.compile('^'+phrase_words[k]+'[,]?$').search(token)
                if matches:
                    found_indexes.append(i)
                    break

        tokens2 = review.split(" ")

        children = []
        tmp_string=''
        for i, token2 in enumerate(tokens2):
            if i in found_indexes:
                if tmp_string != '':
                    children.append(tmp_string)
                    tmp_string = ' '
                children.append(
                html.Span(
                token2,
                style={'background-color': 'springgreen'}))
            else:
                tmp_string+=token2+' '
        children.append(' '+tmp_string)

        #end_snippet = " ".join(text[end_index:-1].split(" ")[:lookahead])
        #if lookahead < len(text[0:start_index].split(" ")):
        #    snippet = '...{}'.format(start_snippet)
        #if lookahead < len(text[end_index:-1].split(" ")):
        #    snippet = '{}...'.format(end_snippet)
            
        snippet = html.Div(children=children)

    except ValueError:
        snippet = ""  # No snippet or whatever error handling you are going to do

    return snippet

In [6]:
def create_topics_container(sorted_topics,df_topic_scores):
    topics_container = []
    for topic in sorted_topics:
        df_topic = df_topic_scores[df_topic_scores.topic == topic]
        scores = df_topic.sentiment
        topic_score = sum(scores)/len(scores)
        #df_topic = df_topic.drop_duplicates(subset='phrase', keep="last")
        #df_topic = df_topic.reset_index()
        df_neg = df_topic[df_topic.sentiment<=0.5]
        df_pos = df_topic[df_topic.sentiment>0.5]

        df_neg['final_score'] = df_neg.score*(1-df_neg.sentiment)
        df_pos['final_score'] = df_pos.score*df_pos.sentiment
        df_topic1 = df_pos.sort_values(by=['final_score'], ascending=False)
        df_topic2 = df_neg.sort_values(by=['final_score'], ascending=False)

            
        if len(df_topic)>10:
            fig = ff.create_distplot([scores.tolist()*5], group_labels = [''], bin_size=0.1, show_rug=False)
            fig['layout'].update(showlegend=False, margin=go.layout.Margin(l=20,r=20,b=0,t=20,pad = 0), height=260)
            distri = dcc.Graph(figure=fig, config={'staticPlot':True,'displayModeBar':False,'queueLength':0}, id='distribution-'+str(re.sub('[^a-zA-z0-9]', '', topic)))
        else:
            distri = None

        phrases_pos =  [html.Div(children=[html.Div(children=[phrase,html.Button('Show', id='buttonPos-'+str(re.sub('[^a-zA-z0-9]', '', topic))+str(i), style={'border-left': '#555',
    'border-top': 'None',
    'border-bottom': 'None',
    'border-right': 'None',
    'border-width': '0.5px',
    'border-left-style': 'solid',
    'border-radius': 0,
    'line-height': '0px',
    'float': 'inline-end',
    'height': 24})], style={'text-align': 'center', 'border-radius': 4, 'margin': 2, 'background-color': 'springgreen'}),html.Div(id='detailsPos-'+str(re.sub('[^a-zA-z0-9]', '', topic))+str(i), children=generate_snippet(df_topic1.drop_duplicates(subset='phrase', keep="last").iloc[i].review, phrase, topic),
                            style={

                                'display': 'none'
                            })]) for i, phrase in enumerate(df_topic1.drop_duplicates(subset='phrase', keep="last")[:5].phrase)]
        phrases_neg =  [html.Div(children=[html.Div(children=[phrase,html.Button('Show', id='buttonNeg-'+str(re.sub('[^a-zA-z0-9]', '', topic))+str(i), style={'border-left': '#555',
    'border-top': 'None',
    'border-bottom': 'None',
    'border-right': 'None',
    'border-width': '0.5px',
    'border-left-style': 'solid',
    'border-radius': 0,
    'line-height': '0px',
    'float': 'inline-end',
    'height': 24})], style={'text-align': 'center', 'border-radius': 4, 'margin': 2, 'background-color': 'coral'}),html.Div(id='detailsNeg-'+str(re.sub('[^a-zA-z0-9]', '', topic))+str(i), children=generate_snippet(df_topic2.drop_duplicates(subset='phrase', keep="last").iloc[i].review, phrase, topic),
                            style={

                                'display': 'none'
                            })]) for i, phrase in enumerate(df_topic2.drop_duplicates(subset='phrase', keep="last")[:5].phrase)]

        # link-input container
        topic_container = html.Div(
            children=[
                html.
                Div(children=[
                    html.H4(
                        topic.title(), style={
                            'margin-top': 0,
                            'margin-bottom': 0
                        }),
                    html.Div(
                        className='rating-bar',
                        children=[
                            html.Div(
                                className='rating',
                                style={
                                    'width': str(topic_score * 100) + '%',
                                    'height': 10
                                })
                        ],
                        style={'height': 10}),
                    html.Div(children=[
                    html.Strong(
                        '# Positive: ' + str(len(df_topic1)), style={
                            'margin-top': 0,
                            'margin-bottom': 0
                        }),
                    html.Br(),
                    html.Strong(
                        '# Negative: ' + str(len(df_topic2)), style={
                            'margin-top': 0,
                            'margin-bottom': 0
                        }),
                    html.Br(), 
                    html.Strong(
                        '# Total: ' + str(len(df_topic1)+len(df_topic2)), style={
                            'margin-top': 0,
                            'margin-bottom': 0
                        }),
                    html.Br(),
                    html.Strong(
                        'Total share: ' + str(int((len(df_topic1)+len(df_topic2))/len(df_topic_scores)*100))+'%', style={
                            'margin-top': 0,
                            'margin-bottom': 0
                        })  
                        ],
                    style={'margin-top': 20})
                ],
                    className='three columns',
                    style={
                        'margin-left': 2,
                        'text-align': 'center'
                    }),
                html.Div(children=[
                    *phrases_pos,
                    *phrases_neg
                ],
                    className='five columns',
                    style={'margin-left': 10}),
                html.Div(children=[
                    distri
                ],
                    className='four columns',
                    style={'margin-left': 10})
            ],
            className='row',
            style={'margin-top': 40})
        topics_container.append(topic_container)
        topics_container.append(html.Hr())
    return topics_container

### The following cells contain the products used for our paper, you can choose one of them and then run the last cell in this block

Fitbit Alta HR:

In [9]:
global filename
filename = './data/fitbitaltahrblacksmallusversion.json'
global topics_filename
topics_filename = './data/fitbitaltahrblacksmallusversion_topics.pkl'
image_src = 'https://images-na.ssl-images-amazon.com/images/I/61vyVXBk9sL._SY679_.jpg'
df = pd.read_pickle('./data/fitbitaltahrblacksmallusversion.pkl')
score = [y for x in df.final_scores_with_sent for y in x]
topic = [y for x in df.topics for y in x]
phrase = [y for x in df.final_phrases_with_sent for y in x]
score=[0 if x is None else x for x in score]
sentiment = [y[1] for x in df.final_sent_scores2 for y in x]
review = []
for i, tmp_review in enumerate(df.review_body):
    for phrase1 in df.final_phrases_with_sent.iloc[i]:
        review.append(tmp_review)
df_topic_scores = pd.DataFrame(data={'topic': topic, 'score': score, 'phrase': phrase,'review': review, 'sentiment': sentiment})
df_topic_scores.dropna()
print(len(df_topic_scores))
df_topic_scores.drop_duplicates()
#df_topic_scores = df_topic_scores[df_topic_scores.score<=0]
with open(topics_filename, 'rb') as f:
    sorted_topics = pickle.load(f)
sorted_topics = [topic for topic in sorted_topics if topic in df_topic_scores.topic.unique()]
print('Create container...')
container = []
container.append(create_info_container(df,image_src,sentiment))
container2 = container+create_topics_container(sorted_topics,df_topic_scores)

2306
Create container...


Sony MDR7506:

In [7]:
global filename
filename = './data/sonymdr7506professionallargediaphragmheadphone.json'
global topics_filename
topics_filename = './data/sonymdr7506professionallargediaphragmheadphone_topics.pkl'
image_src = 'https://images-na.ssl-images-amazon.com/images/I/81TzTAx8weL._SL1500_.jpg'
df = pd.read_pickle('./data/sonymdr7506professionallargediaphragmheadphone.pkl')
score = [y for x in df.final_scores_with_sent for y in x]
topic = [y for x in df.topics for y in x]
phrase = [y for x in df.final_phrases_with_sent for y in x]
score=[0 if x is None else x for x in score]
sentiment = [y[1] for x in df.final_sent_scores2 for y in x]
review = []
for i, tmp_review in enumerate(df.review_body):
    for phrase1 in df.final_phrases_with_sent.iloc[i]:
        review.append(tmp_review)
df_topic_scores = pd.DataFrame(data={'topic': topic, 'score': score, 'phrase': phrase,'review': review, 'sentiment': sentiment})
df_topic_scores.dropna()
print(len(df_topic_scores))
df_topic_scores.drop_duplicates()
#df_topic_scores = df_topic_scores[df_topic_scores.score<=0]
with open(topics_filename, 'rb') as f:
    sorted_topics = pickle.load(f)
sorted_topics = [topic for topic in sorted_topics if topic in df_topic_scores.topic.unique()]
print('Create container...')
container = []
container.append(create_info_container(df,image_src,sentiment))
container2 = container+create_topics_container(sorted_topics,df_topic_scores)

7997
Create container...


Sony MDR7506 with tweets:

In [16]:
global filename
filename = './data/sonymdr7506professionallargediaphragmheadphone_tweets.json'
global topics_filename
topics_filename = './data/sonymdr7506professionallargediaphragmheadphone_tweets_topics.pkl'
image_src = 'https://images-na.ssl-images-amazon.com/images/I/81TzTAx8weL._SL1500_.jpg'
df = pd.read_pickle('./data/sonymdr7506professionallargediaphragmheadphone_tweets.pkl')
score = [y for x in df.final_scores_with_sent for y in x]
topic = [y for x in df.topics for y in x]
phrase = [y for x in df.final_phrases_with_sent for y in x]
score=[0 if x is None else x for x in score]
sentiment = [y[1] for x in df.final_sent_scores2 for y in x]
review = []
for i, tmp_review in enumerate(df.review_body):
    for phrase1 in df.final_phrases_with_sent.iloc[i]:
        review.append(tmp_review)
df_topic_scores = pd.DataFrame(data={'topic': topic, 'score': score, 'phrase': phrase,'review': review, 'sentiment': sentiment})
df_topic_scores.dropna()
print(len(df_topic_scores))
df_topic_scores.drop_duplicates()
#df_topic_scores = df_topic_scores[df_topic_scores.score<=0]
with open(topics_filename, 'rb') as f:
    sorted_topics = pickle.load(f)
sorted_topics = [topic for topic in sorted_topics if topic in df_topic_scores.topic.unique()]
print('Create container...')
container = []
container.append(create_info_container(df,image_src,sentiment))
container2 = container+create_topics_container(sorted_topics,df_topic_scores)

8191
Create container...


Apple Iphone 6:

In [14]:
global filename
filename = './data/appleiphone6sfullyunlocked64gbrosegoldrefurbished.json'
global topics_filename
topics_filename = './data/appleiphone6sfullyunlocked64gbrosegoldrefurbished_topics.pkl'
image_src = 'https://images-na.ssl-images-amazon.com/images/I/81qiCrJlzgL._SL1500_.jpg'
df = pd.read_pickle('./data/appleiphone6sfullyunlocked64gbrosegoldrefurbished.pkl')
score = [y for x in df.final_scores_with_sent for y in x]
topic = [y for x in df.topics for y in x]
phrase = [y for x in df.final_phrases_with_sent for y in x]
score=[0 if x is None else x for x in score]
sentiment = [y[1] for x in df.final_sent_scores2 for y in x]
review = []
for i, tmp_review in enumerate(df.review_body):
    for phrase1 in df.final_phrases_with_sent.iloc[i]:
        review.append(tmp_review)
df_topic_scores = pd.DataFrame(data={'topic': topic, 'score': score, 'phrase': phrase,'review': review, 'sentiment': sentiment})
df_topic_scores.dropna()
print(len(df_topic_scores))
df_topic_scores.drop_duplicates()
#df_topic_scores = df_topic_scores[df_topic_scores.score<=0]
with open(topics_filename, 'rb') as f:
    sorted_topics = pickle.load(f)
sorted_topics = [topic for topic in sorted_topics if topic in df_topic_scores.topic.unique()]
print('Create container...')
container = []
container.append(create_info_container(df,image_src,sentiment))
container2 = container+create_topics_container(sorted_topics,df_topic_scores)

354
Create container...


Fitbit Flex:

In [7]:
global filename
filename = './data/fitbitflexwirelessactivitysleepwristbandblack.json'
global topics_filename
topics_filename = './data/fitbitflexwirelessactivitysleepwristbandblack_topics.pkl'
image_src = 'https://images-na.ssl-images-amazon.com/images/I/81qiCrJlzgL._SL1500_.jpg'
df = pd.read_pickle('./data/fitbitflexwirelessactivitysleepwristbandblack.pkl')
score = [y for x in df.final_scores_with_sent for y in x]
topic = [y for x in df.topics for y in x]
phrase = [y for x in df.final_phrases_with_sent for y in x]
score=[0 if x is None else x for x in score]
sentiment = [y[1] for x in df.final_sent_scores2 for y in x]
review = []
for i, tmp_review in enumerate(df.review_body):
    for phrase1 in df.final_phrases_with_sent.iloc[i]:
        review.append(tmp_review)
df_topic_scores = pd.DataFrame(data={'topic': topic, 'score': score, 'phrase': phrase,'review': review, 'sentiment': sentiment})
df_topic_scores.dropna()
print(len(df_topic_scores))
df_topic_scores.drop_duplicates()
#df_topic_scores = df_topic_scores[df_topic_scores.score<=0]
with open(topics_filename, 'rb') as f:
    sorted_topics = pickle.load(f)
sorted_topics = [topic for topic in sorted_topics if topic in df_topic_scores.topic.unique()]
print('Create container...')
container = []
container.append(create_info_container(df,image_src,sentiment))
container2 = container+create_topics_container(sorted_topics,df_topic_scores)

4042
Create container...


In [None]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

# create app
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app.css.append_css({"external_url": "https://codepen.io/anon/pen/MzZQwp.css"})
app.css.append_css({
    "external_url":
    "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css"
})
app.css.append_css({
    "external_url":
    "https://codepen.io/austingreen/pen/burBc.css"
})
app.scripts.config.serve_locally = True
app.layout = html.Div([
    link_input, *container2
])
app.config['suppress_callback_exceptions']=True

print('Create callbacks...')
inputs = []
outputs = []
for topic in sorted_topics:
    scores = df_topic_scores[df_topic_scores.topic == topic].score
    topic_score = (scores-min(scores))/(max(scores)-min(scores))
    df_topic = df_topic_scores[df_topic_scores.topic == topic]
    df_topic = df_topic.drop_duplicates(subset='phrase', keep="last")
    df_topic = df_topic.reset_index()
    df_neg = df_topic[df_topic.sentiment<=0.5]
    df_pos = df_topic[df_topic.sentiment>=0.5]

    df_topic1 = df_pos.sort_values(by=['score'], ascending=False)
    df_topic2 = df_neg.sort_values(by=['score'], ascending=True)
    if len(df_topic1)>5:
        max_len_topic1=5
    else:
        max_len_topic1=len(df_topic1)
    if len(df_topic2)>5:
        max_len_topic2=5
    else:
        max_len_topic2=len(df_topic2)
    for i in range(max_len_topic1):
        inputs.append(Input('buttonPos-'+str(re.sub('[^a-zA-z0-9]', '', topic))+str(i), 'n_clicks'))
        outputs.append(Output('detailsPos-'+str(re.sub('[^a-zA-z0-9]', '', topic))+str(i), 'style'))
    for i in range(max_len_topic2):
        inputs.append(Input('buttonNeg-'+str(re.sub('[^a-zA-z0-9]', '', topic))+str(i), 'n_clicks'))
        outputs.append(Output('detailsNeg-'+str(re.sub('[^a-zA-z0-9]', '', topic))+str(i), 'style'))

def radio_toggle(n_clicks):
    if n_clicks:
        if n_clicks %2 != 0:
            return {'display': 'block',
                   'border-radius': 4,
                    'margin': 2,
                    'background-color': 'lightgrey',
                    'padding': 5}
        else:
            return {'display': 'none'}
    else:
        return {'display': 'none'}
    
for i, output in enumerate(outputs):
    app.callback(output, [inputs[i]])(radio_toggle)






# create callbacks 

# run app localy
if __name__ == '__main__':
    app.run_server(debug=False, host='0.0.0.0')

Create callbacks...
 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://0.0.0.0:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [08/Mar/2019 16:09:19] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:19] "[37mGET /_dash-component-suites/dash_renderer/react-dom@15.4.2.min.js?v=0.15.0&m=1542374987 HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:19] "[37mGET /_dash-component-suites/dash_renderer/react@15.4.2.min.js?v=0.15.0&m=1542374987 HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:19] "[37mGET /_dash-component-suites/dash_html_components/dash_html_components.min.js?v=0.13.2&m=1542375024 HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:20] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:20] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:21] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:21] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:21] "[37mPOST /_dash-update

127.0.0.1 - - [08/Mar/2019 16:09:22] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:22] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:22] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:22] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:22] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:22] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:22] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:22] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:22] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:22] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [08/Mar/2019 16:09:22] "[37mPOST /_dash-update-component HTTP/1.1

### This is the dynamic version of the dashboard

In [None]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

# create app
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app.css.append_css({"external_url": "https://codepen.io/anon/pen/MzZQwp.css"})
app.css.append_css({
    "external_url":
    "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css"
})
app.css.append_css({
    "external_url":
    "https://codepen.io/austingreen/pen/burBc.css"
})
app.scripts.config.serve_locally = True
app.layout = html.Div([
    link_input, html.Div(id='output-data-upload')
])
app.config['suppress_callback_exceptions']=True

def radio_toggle(n_clicks):
    print('AAAAAAA')
    if n_clicks %2 != 0:
        return {'display': 'block',
               'border-radius': 4,
                'margin': 2,
                'background-color': 'lightgrey',
                'padding': 5}
    else:
        return {'display': 'none'}
    
def create_callbacks(inputs,outputs):
    for i, output in enumerate(outputs):
        print((output,[inputs[i]]))
        app.callback(output, [inputs[i]])(radio_toggle)


@app.callback(Output('output-data-upload', 'children'),
              [Input('submit-button', 'n_clicks')],
              [State('link', 'value')])
def update_output(nclicks, value):
    print(value)
    if value is not None:
        print('Load methods...')
        %run ./methods.ipynb
        print('Crawling...')
        %run ./crawl_amazon_reviews.ipynb
        crawl(value)
        reactor.run()
        print('Preprocessing...')
        %run ./preprocess.ipynb
        print('TFIDF...')
        %run ./tfidf.ipynb
        print('Extract noun phrases...')
        %run ./extract_noun_phrases.ipynb
        print('Extract colloactions...')
        %run ./extract_collocations.ipynb
        print('Create topics...')
        %run ./create_topics.ipynb
        print('Deduplicate phrases...')
        %run ./deduplicate_phrases.ipynb
        print('Filter sentiments...')
        %run ./filter_sentiments.ipynb
        print('Transform data for dash...')
        score = [y for x in df.final_scores_with_sent for y in x]
        topic = [y for x in df.topics for y in x]
        phrase = [y for x in df.final_phrases_with_sent for y in x]
        score=[0 if x is None else x for x in score]
        sentiment = [y[1] for x in df.final_sent_scores2 for y in x]
        review = []
        for i, tmp_review in enumerate(df.review_body):
            for phrase1 in df.final_phrases_with_sent.iloc[i]:
                review.append(tmp_review)
        df_topic_scores = pd.DataFrame(data={'topic': topic, 'score': score, 'phrase': phrase,'review': review, 'sentiment': sentiment})
        df_topic_scores.dropna()
        #df_topic_scores.drop_duplicates()
        #df_topic_scores = df_topic_scores[df_topic_scores.score<=0]
        with open(topics_filename, 'rb') as f:
            sorted_topics = pickle.load(f)
        sorted_topics = [topic for topic in sorted_topics if topic in df_topic_scores.topic.unique()]
        print('Create container...')
        container = []
        container.append(create_info_container(df,image_src,sentiment))
        container2 = container+create_topics_container(sorted_topics,df_topic_scores)
        print('Create callbacks...')
        inputs = []
        outputs = []
        for topic in sorted_topics:
            scores = df_topic_scores[df_topic_scores.topic == topic].score
            topic_score = (scores-min(scores))/(max(scores)-min(scores))
            df_topic = df_topic_scores[df_topic_scores.topic == topic]
            df_topic = df_topic.drop_duplicates(subset='phrase', keep="last")
            df_topic = df_topic.reset_index()
            df_neg = df_topic[df_topic.sentiment<=0.5]
            df_pos = df_topic[df_topic.sentiment>=0.5]

            df_topic1 = df_pos.sort_values(by=['score'], ascending=False)
            df_topic2 = df_neg.sort_values(by=['score'], ascending=True)
            if len(df_topic1)>5:
                max_len_topic1=5
            else:
                max_len_topic1=len(df_topic1)
            if len(df_topic2)>5:
                max_len_topic2=5
            else:
                max_len_topic2=len(df_topic2)
            for i in range(max_len_topic1):
                inputs.append(Input('buttonPos-'+str(re.sub('[^a-zA-z0-9]', '', topic))+str(i), 'n_clicks'))
                outputs.append(Output('detailsPos-'+str(re.sub('[^a-zA-z0-9]', '', topic))+str(i), 'style'))
            for i in range(max_len_topic2):
                inputs.append(Input('buttonNeg-'+str(re.sub('[^a-zA-z0-9]', '', topic))+str(i), 'n_clicks'))
                outputs.append(Output('detailsNeg-'+str(re.sub('[^a-zA-z0-9]', '', topic))+str(i), 'style'))

        create_callbacks(inputs,outputs)

        return container2
    #return *create_topics_container


# create callbacks

# run app localy
if __name__ == '__main__':
    app.run_server(debug=False, host='0.0.0.0')

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://0.0.0.0:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [06/Mar/2019 11:03:55] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [06/Mar/2019 11:03:57] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [06/Mar/2019 11:03:57] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [06/Mar/2019 11:03:57] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


None
https://www.amazon.com/Google-Pixel-Unlocked-12-3MP-Camera/dp/B01M01ZZAC/ref=sr_1_4?keywords=google+phone&qid=1551866661&s=gateway&sr=8-4
Load methods...


2019-03-06 11:05:03 [scrapy.crawler] INFO: Overridden settings: {'CONCURRENT_REQUESTS': '1', 'DOWNLOAD_DELAY': '2'}
2019-03-06 11:05:03 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.logstats.LogStats',
 'scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.memusage.MemoryUsage']

twisted.web.client.HTTPClientFactory was deprecated in Twisted 16.7.0: please use https://pypi.org/project/treq/ or twisted.web.client.Agent instead


2019-03-06 11:05:03 [scrapy.middleware] INFO: Enabled downloader middlewares:
['scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware',
 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware',
 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware',
 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware',
 'scrapy.downloadermiddlewares.retry.RetryMiddleware',
 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware',
 'scrapy.downloadermiddleware

Crawling...


2019-03-06 11:05:04 [scrapy.downloadermiddlewares.redirect] DEBUG: Redirecting (301) to <GET https://www.amazon.com/Google-Pixel-Unlocked-12-3MP-Camera/dp/B01M01ZZAC> from <GET https://www.amazon.com/Google-Pixel-Unlocked-12-3MP-Camera/dp/B01M01ZZAC/ref=sr_1_4?keywords=google+phone&qid=1551866661&s=gateway&sr=8-4>

Passing method to twisted.internet.ssl.CertificateOptions was deprecated in Twisted 17.1.0. Please use a combination of insecurelyLowerMinimumTo, raiseMinimumTo, and lowerMaximumSecurityTo instead, as Twisted will correctly configure the method.


2019-03-06 11:05:07 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.amazon.com/Google-Pixel-Unlocked-12-3MP-Camera/dp/B01M01ZZAC> (referer: None)
2019-03-06 11:05:07 [scrapy.core.engine] INFO: Closing spider (finished)
2019-03-06 11:05:07 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 589,
 'downloader/request_count': 2,
 'downloader/request_method_count/GET': 2,
 'downloader/response_b

2019-03-06 11:05:22 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.amazon.com/Google-Pixel-Unlocked-12-3MP-Camera/product-reviews/B01M01ZZAC?pageNumber=7&reviewerType=all_reviews> (referer: https://www.amazon.com/Google-Pixel-Unlocked-12-3MP-Camera/product-reviews/B01M01ZZAC?pageNumber=6&reviewerType=all_reviews)

Passing method to twisted.internet.ssl.CertificateOptions was deprecated in Twisted 17.1.0. Please use a combination of insecurelyLowerMinimumTo, raiseMinimumTo, and lowerMaximumSecurityTo instead, as Twisted will correctly configure the method.


2019-03-06 11:05:24 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.amazon.com/Google-Pixel-Unlocked-12-3MP-Camera/product-reviews/B01M01ZZAC?pageNumber=8&reviewerType=all_reviews> (referer: https://www.amazon.com/Google-Pixel-Unlocked-12-3MP-Camera/product-reviews/B01M01ZZAC?pageNumber=7&reviewerType=all_reviews)

Passing method to twisted.internet.ssl.CertificateOptions was deprecated in Twisted 17.1.0. Plea

2019-03-06 11:05:50 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.amazon.com/Google-Pixel-Unlocked-12-3MP-Camera/product-reviews/B01M01ZZAC?pageNumber=18&reviewerType=all_reviews> (referer: https://www.amazon.com/Google-Pixel-Unlocked-12-3MP-Camera/product-reviews/B01M01ZZAC?pageNumber=17&reviewerType=all_reviews)

Passing method to twisted.internet.ssl.CertificateOptions was deprecated in Twisted 17.1.0. Please use a combination of insecurelyLowerMinimumTo, raiseMinimumTo, and lowerMaximumSecurityTo instead, as Twisted will correctly configure the method.


2019-03-06 11:05:53 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.amazon.com/Google-Pixel-Unlocked-12-3MP-Camera/product-reviews/B01M01ZZAC?pageNumber=19&reviewerType=all_reviews> (referer: https://www.amazon.com/Google-Pixel-Unlocked-12-3MP-Camera/product-reviews/B01M01ZZAC?pageNumber=18&reviewerType=all_reviews)

Passing method to twisted.internet.ssl.CertificateOptions was deprecated in Twisted 17.1.0. 

Preprocessing...
TFIDF...


2019-03-06 11:06:06 [gensim.corpora.dictionary] INFO: adding document #0 to Dictionary(0 unique tokens: [])
2019-03-06 11:06:06 [gensim.corpora.dictionary] INFO: built Dictionary(2024 unique tokens: ['adjust', 'feature', 'specification', 'cmon', 'trust']...) from 176 documents (total 8330 corpus positions)
2019-03-06 11:06:06 [gensim.models.tfidfmodel] INFO: collecting document frequencies
2019-03-06 11:06:06 [gensim.models.tfidfmodel] INFO: PROGRESS: processing document #0
2019-03-06 11:06:06 [gensim.models.tfidfmodel] INFO: calculating IDF weights for 176 documents and 2023 features (5874 matrix non-zeros)


Extract noun phrases...
Extract colloactions...


2019-03-06 11:06:57 [urllib3.connectionpool] DEBUG: Starting new HTTP connection (1): api.cortical.io:80


Create topics...


2019-03-06 11:07:04 [urllib3.connectionpool] DEBUG: http://api.cortical.io:80 "POST /rest/text/keywords?retina_name=en_associative HTTP/1.1" 200 None


Deduplicate phrases...
Filter sentiments...
Loaded model from disk
Transform data for dash...
Create container...


127.0.0.1 - - [06/Mar/2019 11:07:30] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
2019-03-06 11:07:30 [werkzeug] INFO: 127.0.0.1 - - [06/Mar/2019 11:07:30] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


Create callbacks...
(<dash.dependencies.Output object at 0x7f3e90b4a780>, [<dash.dependencies.Input object at 0x7f3e90b4a748>])
(<dash.dependencies.Output object at 0x7f3e90b4a7f0>, [<dash.dependencies.Input object at 0x7f3e90b4a7b8>])
(<dash.dependencies.Output object at 0x7f3e90b4a898>, [<dash.dependencies.Input object at 0x7f3e90b4a860>])
(<dash.dependencies.Output object at 0x7f3e90b4a908>, [<dash.dependencies.Input object at 0x7f3e90b4a8d0>])
(<dash.dependencies.Output object at 0x7f3e90b4a978>, [<dash.dependencies.Input object at 0x7f3e90b4a940>])
(<dash.dependencies.Output object at 0x7f3e90b4a9e8>, [<dash.dependencies.Input object at 0x7f3e90b4a9b0>])
(<dash.dependencies.Output object at 0x7f3e90b4aa58>, [<dash.dependencies.Input object at 0x7f3e90b4aa20>])
(<dash.dependencies.Output object at 0x7f3e90b4aac8>, [<dash.dependencies.Input object at 0x7f3e90b4aa90>])
(<dash.dependencies.Output object at 0x7f3e90b4ab38>, [<dash.dependencies.Input object at 0x7f3e90b4ab00>])
(<dash.d