# Load Spacy Model

In [1]:
import spacy

nlp = spacy.load('en_core_web_lg')

# Prepare data and load word lists

In [2]:
import pandas as pd
comments = pd.read_csv('data/comments_top500.csv')

# 167: Both ^^

test_comment = comments.iloc[167].body.replace(u'\n', '')

In [25]:
def load_lexicon():
    lexicon = {}
    with open('data/sentiments/negative-words.txt') as f:
        lines = [line.replace('\n', '') for line in f.readlines() if not line.startswith(';') and not line.startswith('\n')]
        lexicon['negative'] = nlp(' '.join(lines))
    
    with open('data/sentiments/positive-words.txt') as f:
        lines = [line.replace('\n', '') for line in f.readlines() if not line.startswith(';') and not line.startswith('\n')]
        lexicon['positive'] = nlp(' '.join(lines))
        
    return lexicon
    
lexicon = load_lexicon()

#lexicon = lexicon.groupby('priorpolarity')['word1'].apply(lambda x: nlp("%s" % ' '.join(x))).to_dict()

#lexicon['word1'] = [nlp(word) for word in lexicon['word1']]

#print(lexicon)



In [4]:
def review_wordlist(input):
    doc = nlp(input)
    return ' '.join(token.lemma_ for token in doc if not token.is_stop and not token.is_punct)

In [5]:
def review_sentences(input):
    doc = nlp(input.strip())
    sentences = []
    for sent in doc.sents:
        sentences.append(review_wordlist(sent.text))
        
    return '. '.join(sentences)

#print(review_sentences('test sentence'))

In [27]:
def most_similar(word):
    queries = {}
    if not word.vector_norm:
        return queries
    
    for sentiment, word_list in lexicon.items():
        temp_result = [word.similarity(w) for w in word_list if w.prob > -15 and w.vector_norm]
        queries[sentiment] = sorted(temp_result, key=lambda w: w, reverse=True)[:10]
    
    return queries

print(most_similar(nlp(u'bad')[0]))

{'negative': [1.0, 0.7811164, 0.77391917, 0.7721673, 0.7721673, 0.7619181, 0.7444374, 0.7384125, 0.7095577, 0.69091314], 'positive': [0.73550904, 0.67329127, 0.67329127, 0.67329127, 0.66459054, 0.6571638, 0.64163655, 0.6401641, 0.6157143, 0.6157143]}


In [62]:
def mean(list):
    if len(list) > 0:
        return sum(list)/len(list)
    else:
        return 0

def analyze(word_list):
    result = {
        'negative': [],
        'positive': []
    }
    for word in nlp(review_wordlist(word_list)):
        
        similar_words = most_similar(word)
        if len(similar_words) > 0:
            for sentiment, s_word in similar_words.items():
                result[sentiment].extend(s_word)
    
    negative = mean(result['negative'])
    positive = mean(result['positive'])
    
    if negative > positive:
        return 0
    elif negative < positive:
        return 1
    else:
        # Both are 0 or otherwise the same (very unlikely)
        return -999

# print(analyze('The food was great'))
# Special cases: 
# - Would not go back. (gets completetly filtered by review_wordlist)


1


# TODO

- Cleanup texts of markdown quotes (they mess with the actual text being written)
- Add some emphasize on where in the sentence a word is (subj, verb, idk?)
- ....

In [15]:
labelled = []
with open('data/yelp_labelled.txt') as f:
    for line in f.readlines():
        parts = line.split('\t')
        labelled.append({
            'sentence': parts[0],
            'original_sentiment': int(parts[1].replace('\n', ''))
        })

In [64]:
counter = 0
for entry in labelled:
    counter+=1
    sentence = entry['sentence']
    result = analyze(sentence)
    print(counter, sentence, result)
    entry['computed_sentiment'] = result

Wow... Loved this place. 1
Crust is not good. 1
Not tasty and the texture was just nasty. 0
Stopped by during the late May bank holiday off Rick Steve recommendation and loved it. 1
The selection on the menu was great and so were the prices. 1
Now I am getting angry and I want my damn pho. 0
Honeslty it didn't taste THAT fresh.) 1
The potatoes were like rubber and you could tell they had been made up ahead of time being kept under a warmer. 0
The fries were great too. 1
A great touch. 1
Service was very prompt. 1
Would not go back. -999
The cashier had no care what so ever on what I had to say it still ended up being wayyy overpriced. 0
I tried the Cape Cod ravoli, chicken,with cranberry...mmmm! 0
I was disgusted because I was pretty sure that was human hair. 0
I was shocked because no signs indicate cash only. 0
Highly recommended. 1
Waitress was a little slow in service. 0
This place is not worth your time, let alone Vegas. 1
did not like at all. 1
The Burrittos Blah! 0
The food, ama

They have a good selection of food including a massive meatloaf sandwich, a crispy chicken wrap, a delish tuna melt and some tasty burgers. 0
The management is rude. 0
Delicious NYC bagels, good selections of cream cheese, real Lox with capers even. 1
Great Subway, in fact it's so good when you come here every other Subway will not meet your expectations. 1
I had a seriously solid breakfast here. 1
This is one of the best bars with food in Vegas. 1
He was extremely rude and really, there are so many other restaurants I would love to dine at during a weekend in Vegas. 1
My drink was never empty and he made some really great menu suggestions. 1
Don't do it!!!! -999
The waiter wasn't helpful or friendly and rarely checked on us. 1
My husband and I ate lunch here and were very disappointed with the food and service. 0
And the red curry had so much bamboo shoots and wasn't very tasty to me. 0
Nice blanket of moz over top but i feel like this was done to cover up the subpar food. 0
The bathr

Now the pizza itself was good the peanut sauce was very tasty. 0
We had 7 at our table and the service was pretty fast. 1
Fantastic service here. 1
I as well would've given godfathers zero stars if possible. 1
They know how to make them here. 0
very tough and very short on flavor! 0
I hope this place sticks around. 0
I have been in more than a few bars in Vegas, and do not ever recall being charged for tap water. 0
The restaurant atmosphere was exquisite. 1
Good service, very clean, and inexpensive, to boot! 1
The seafood was fresh and generous in portion. 1
Plus, it's only 8 bucks. 0
The service was not up to par, either. 0
Thus far, have only visited twice and the food was absolutely delicious each time. 1
Just as good as when I had it more than a year ago! 1
For a self proclaimed coffee cafe, I was wildly disappointed. 0
The Veggitarian platter is out of this world! 1
You cant go wrong with any of the food here. 0
You can't beat that. 1
Stopped by this place while in Madison for the

We walked away stuffed and happy about our first Vegas buffet experience. 1
Service was excellent and prices are pretty reasonable considering this is Vegas and located inside the Crystals shopping mall by Aria. 1
To summarize... the food was incredible, nay, transcendant... but nothing brings me joy quite like the memory of the pneumatic condiment dispenser. 1
I'm probably one of the few people to ever go to Ians and not like it. 1
Kids pizza is always a hit too with lots of great side dish options for the kiddos! 1
Service is perfect and the family atmosphere is nice to see. 1
Cooked to perfection and the service was impeccable. 1
This one is simply a disappointment. 0
Overall, I was very disappointed with the quality of food at Bouchon. 1
I don't have to be an accountant to know I'm getting screwed! 0
Great place to eat, reminds me of the little mom and pop shops in the San Francisco Bay Area. 1
Today was my first taste of a Buldogis Gourmet Hot Dog and I have to tell you it was mor

If there were zero stars I would give it zero stars. 1
Great steak, great sides, great wine, amazing desserts. 1
Worst martini ever! 0
The steak and the shrimp are in my opinion the best entrees at GC. 0
I had the opportunity today to sample your amazing pizzas! 1
We waited for thirty minutes to be seated (although there were 8 vacant tables and we were the only folks waiting). 0
The yellowtail carpaccio was melt in your mouth fresh. 0
I won't try going back there even if it's empty. 0
No, I'm going to eat the potato that I found some strangers hair in it. 0
Just spicy enough.. Perfect actually. 1
Last night was my second time dining here and I was so happy I decided to go back! 1
not even a "hello, we will be right with you." 1
The desserts were a bit strange. 0
My boyfriend and I came here for the first time on a recent trip to Vegas and could not have been more pleased with the quality of food and service. 1
I really do recommend this place, you can go wrong with this donut place! 1

Dessert: Panna Cotta was amazing. 1
Very good food, great atmosphere.1 1
Damn good steak. 0
Total brunch fail. 0
Prices are very reasonable, flavors are spot on, the sauce is home made, and the slaw is not drenched in mayo. 0
The decor is nice, and the piano music soundtrack is pleasant. 1
The steak was amazing...rge fillet relleno was the best seafood plate i have ever had! 1
Good food , good service . 1
It was absolutely amazing. 1
I probably won't be back, to be honest. 1
will definitely be back! 1
The sergeant pepper beef sandwich with auju sauce is an excellent sandwich as well. 0
Hawaiian Breeze, Mango Magic, and Pineapple Delight are the smoothies that I've tried so far and they're all good. 1
Went for lunch - service was slow. 0
We had so much to say about the place before we walked in that he expected it to be amazing, but was quickly disappointed. 1
I was mortified. 0
Needless to say, we will never be back here again. 0
Anyways, The food was definitely not filling at all, and

KeyboardInterrupt: 

In [91]:
# [
# {
#   'sentence': 'asd',
#   'original_sentiemnt': 1,
#   'computed_sentiment': 0
# }
# ]
from bokeh.plotting import figure, show, output_notebook

output_notebook()

x = {
    'correct': 0,
    'wrong': 0,
    'not': 0
}

for entry in labelled:
    if 'computed_sentiment' in entry:
        if entry['computed_sentiment'] == -999:
            x['not'] +=1
        elif entry['computed_sentiment'] == entry['original_sentiment']:
            x['correct']+=1
        elif entry['computed_sentiment'] != entry['original_sentiment']:
            x['wrong']+=1

            
print(x)
data = pd.Series(x).reset_index(name='value')
print(data)
p = figure(x_range=list(x.keys()))

p.vbar(source=data, width=.9, x ='index', top='value')
show(p)

{'correct': 549, 'wrong': 175, 'not': 6}
     index  value
0  correct    549
1    wrong    175
2      not      6
