In [16]:
import pandas as pd
import numpy as np
import gradio as gr
import nltk
import spacy
import pickle
from gensim.models.ldamodel import LdaModel
from gensim import corpora
from nltk.sentiment.vader import SentimentIntensityAnalyzer

nlp = spacy.load("en_core_web_sm")
nltk.download('vader_lexicon')

from absa_functions import *

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Evan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [17]:
def parse_targets(nlp, review):
    doc = nlp(review)
    targets = []
    target = ''

    for token in doc:
        if (token.dep_ in ['nsubj','dobj', 'pobj', 'ROOT']) and (token.pos_ in ['NOUN', 'PROPN', 'PRON']):
            target = token.text
            targets.append(target)

    return targets

In [18]:
def parse_adjectives(nlp, review):
    doc = nlp(review)
    adjectives = []
    adjective = ''

    for token in doc:
        if token.pos_ == 'ADJ':
            prepend = ''
            for child in token.children:
                if child.pos_ != 'ADV':
                    continue
                prepend += child.text + ' '
            adjective = prepend + token.text
            adjectives.append(adjective)

    return adjectives

In [19]:
def get_topic_from_word(word, lda_model, topic_map):
    try:
        topics_raw = lda_model.get_term_topics(word, minimum_probability=0.0000001)
        topic_dict = {topic_map[tup[0]]: tup[1] for tup in topics_raw}
        best_topic = max(topic_dict, key=topic_dict.get)
    except:
        best_topic = 'miscellaneous'

    return best_topic

In [20]:
corpus = pickle.load(open('corpus.pkl', 'rb'))
dictionary = corpora.Dictionary.load('dictionary.gensim')
lda_model = LdaModel.load('best_lda_model.gensim')

In [21]:
sid = SentimentIntensityAnalyzer()

In [22]:
#numerical mapping for topics in LDA model
topic_map = {0: 'menu', 1: 'service', 2: 'miscellaneous', 3: 'place', 4: 'price', 5: 'food', 6: 'staff'}

In [23]:
review = 'Wonderful music, beautiful restaurant, great wine.'

In [24]:
count = 0
doc = nlp(review)
for token in doc:
    count += 1
    if count <= 20:
    # if 12 <= count <= 18:
        print(token.text, token.dep_, token.head.text, token.head.pos_, token.pos_,[child for child in token.children])

Wonderful amod music NOUN ADJ []
music ROOT music NOUN NOUN [Wonderful, ,, restaurant, wine, .]
, punct music NOUN PUNCT []
beautiful amod restaurant NOUN ADJ []
restaurant appos music NOUN NOUN [beautiful, ,]
, punct restaurant NOUN PUNCT []
great amod wine NOUN ADJ []
wine appos music NOUN NOUN [great]
. punct music NOUN PUNCT []


In [25]:
parse_targets(nlp, review)

['music']

In [26]:
parse_adjectives(nlp, review)

['Wonderful', 'beautiful', 'great']

In [27]:
get_topic_from_word(prepare_text_for_lda('entitlement')[0], lda_model, topic_map)

'miscellaneous'

In [28]:
def pos_prediction(restaurant_review):
    nlp = spacy.load("en_core_web_sm")
    targets = parse_targets(nlp, restaurant_review)
    adjectives = parse_adjectives(nlp, restaurant_review)

    outputs = []
    if len(targets) == len(adjectives): 
        for i in range(0, len(targets)):
            output = {}
            
            output.update({'aspect': targets[i], 'adjective': adjectives[i]})
            try:
                topic = get_topic_from_word(prepare_text_for_lda(targets[i])[0], lda_model, topic_map)
            except:
                topic = 'miscellaneous'
            score = sid.polarity_scores(adjectives[i])['compound']
            sentiment = 'positive' if score > 0 else ('neutral' if score == 0 else 'negative')
            output.update({'topic': topic, 'polarity': sentiment})
            outputs.append(output)
    elif len(targets) > len(adjectives):
        for i in range(0, len(targets)):
            output = {}
            try:
                topic = get_topic_from_word(prepare_text_for_lda(targets[i])[0], lda_model, topic_map)
                score = sid.polarity_scores(adjectives[i])['compound']
                sentiment = 'positive' if score > 0 else ('neutral' if score == 0 else 'negative')
                output.update({'aspect': targets[i], 'adjective': adjectives[i], 'topic': topic, 'polarity': sentiment})
            except IndexError:
                topic = get_topic_from_word(prepare_text_for_lda(targets[i])[0], lda_model, topic_map)
                output.update({'aspect': targets[i], 'adjective': 'None', 'topic': topic, 'polarity': 'None'})
            
            outputs.append(output)
    elif len(targets) < len(adjectives):
        for i in range(0, len(adjectives)):
            output = {}
            try:
                topic = get_topic_from_word(prepare_text_for_lda(targets[i])[0], lda_model, topic_map)
                score = sid.polarity_scores(adjectives[i])['compound']
                sentiment = 'positive' if score > 0 else ('neutral' if score == 0 else 'negative')
                output.update({'aspect': targets[i], 'adjective': adjectives[i], 'topic': topic, 'polarity': sentiment})
            except IndexError:
                score = sid.polarity_scores(adjectives[i])['compound']
                sentiment = 'positive' if score > 0 else ('neutral' if score == 0 else 'negative')
                output.update({'aspect': 'None', 'adjective': adjectives[i], 'topic': 'miscellaneous', 'polarity': sentiment})
            outputs.append(output)
    
    return pd.DataFrame(outputs)
        


In [29]:
pos_prediction(review)

Unnamed: 0,aspect,adjective,topic,polarity
0,music,Wonderful,menu,positive
1,,beautiful,miscellaneous,positive
2,,great,miscellaneous,positive


In [30]:
iface = gr.Interface(fn=pos_prediction, 
                    inputs=gr.inputs.Textbox(lines=2, placeholder='Enter restaurant review here...'),
                    outputs=gr.outputs.Dataframe(headers=['Aspect','Adjective', 'Topic', 'Polarity']),
                    examples=[
                        ['The restaurant is too dark, and the bathroom was not clean. Also, everyone there is rude.'],
                        ['Fabulous dinner & environment but the older waiters have a real sense of entitlement.'],
                        ['Entrees were way too expensive.'],
                        ['The dinner was great, and the waiter was super friendly.']
                    ])
iface.launch(share=True)

Running locally at: http://127.0.0.1:7861/
This share link will expire in 72 hours. If you need a permanent link, visit: https://gradio.app/introducing-hosted
Running on External URL: https://58123.gradio.app
Interface loading below...


(<Flask 'gradio.networking'>,
 'http://127.0.0.1:7861/',
 'https://58123.gradio.app')

[2021-10-06 21:59:26,341] ERROR in app: Exception on /api/predict/ [POST]
Traceback (most recent call last):
  File "<ipython-input-28-2b8dce64a55e>", line 24, in pos_prediction
    topic = get_topic_from_word(prepare_text_for_lda(targets[i])[0], lda_model, topic_map)
IndexError: list index out of range

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\nlpenv\lib\site-packages\flask\app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\ProgramData\Anaconda3\envs\nlpenv\lib\site-packages\flask\app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\ProgramData\Anaconda3\envs\nlpenv\lib\site-packages\flask_cors\extension.py", line 165, in wrapped_function
    return cors_after_request(app.make_response(f(*args, **kwargs)))
  File "C:\ProgramData\Anaconda3\envs\nlpenv\lib\site-packages\flask\app.py", line 1821, in handle_user_