In [34]:
# Import Libraries
import nltk
import pandas as pd
import pickle as pk
import ast
import math
from nltk.sentiment.vader import SentimentIntensityAnalyzer

analyser = SentimentIntensityAnalyzer()

In [35]:
def opinion_sentiment(row):    
    hotel = row['Hotel']
    text = row['Reviews']
    aspects = ast.literal_eval(row['Aspect'])
    aspect_opinion_word = {}

    for aspect in aspects:
        aspect_opinion_word.setdefault(aspect[1], [0,0])
        word_senti = word_sentiment(aspect[0])
        
        hotel_aspect = str(str(hotel) + '_' + str(aspect[1]))
        if hotel_aspect not in best_review:
            best_review[hotel_aspect] = [(text, -math.inf), (text, -math.inf), (text, -math.inf)]
            
        if word_senti > 0: # Positive 
            aspect_opinion_word[aspect[1]][0] += word_senti
        elif word_senti < 0: # Negative
            aspect_opinion_word[aspect[1]][1] += word_senti
        
        if hotel_aspect in best_review:
            top_revs = best_review[hotel_aspect]
            min_score = math.inf
            min_index = math.inf
            flag = False
            for i in range(len(top_revs)):
                if float(top_revs[i][1]) < min_score:
                    min_score = float(top_revs[i][1])
                    min_index = i
                    flag = True
                
            if flag:
                if aspect_opinion_word[aspect[1]][0] > min_score:
                    top_revs[min_index] = (text, aspect_opinion_word[aspect[1]][0])
                    best_review[hotel_aspect] = top_revs
                    flag = False
 
    # Structure : {aspect: [Positive, Negative]}
    # Structure : {hotel_aspect: [(review1, score), (review2, score), (review3, score)]}
    return aspect_opinion_word

In [36]:
def word_sentiment(phrase):
    
    score = analyser.polarity_scores(phrase)
    
    return score['compound']

In [37]:
# Import processed sample data from folder
infile = open("Stored Data/sample_reviews.pickle", "rb")
data = pk.load(infile)
infile.close()

data.head(10)

Unnamed: 0,Hotel,City,Reviews,Date,Score
0,Hotel Arena,"Amsterdam, Netherlands",Only the park outside of the hotel was beauti...,8/3/2017,2.9
1,Hotel Arena,"Amsterdam, Netherlands",No real complaints the hotel was great great ...,8/3/2017,7.5
2,K K Hotel George,"London, United Kingdom",Very comfortable beds smart bathroom good sho...,8/3/2017,9.6
3,Apex Temple Court Hotel,"London, United Kingdom",Everything,8/3/2017,10.0
4,Apex Temple Court Hotel,"London, United Kingdom",Great Hotel with wonderful staff and good cui...,8/3/2017,7.9
5,The Park Grand London Paddington,"London, United Kingdom",Location,8/3/2017,5.4
6,The Park Grand London Paddington,"London, United Kingdom",The front desk staff were very helpful and pl...,8/3/2017,7.5
7,The Park Grand London Paddington,"London, United Kingdom",Friendly atmosphere beds comfortable,8/3/2017,8.3
8,Park Plaza County Hall London,"London, United Kingdom",Just the location and view,8/3/2017,5.0
9,Park Plaza County Hall London,"London, United Kingdom",Around the corner from the London eye and use...,8/3/2017,7.5


In [38]:
# Pre-processed aspects gotten from Java Script (Enhanced Dependencies)
data['Aspect'] = pd.read_csv("Stored Data\java_output.txt", header=None, delimiter="\t", names = ["Aspects"])
data.head(10)

Unnamed: 0,Hotel,City,Reviews,Date,Score,Aspect
0,Hotel Arena,"Amsterdam, Netherlands",Only the park outside of the hotel was beauti...,8/3/2017,2.9,"[(""beautiful"",""park"")]"
1,Hotel Arena,"Amsterdam, Netherlands",No real complaints the hotel was great great ...,8/3/2017,7.5,"[(""real"",""complaints""), (""surroundings"",""compl..."
2,K K Hotel George,"London, United Kingdom",Very comfortable beds smart bathroom good sho...,8/3/2017,9.6,"[(""very comfortable"",""beds""), (""smart"",""bathro..."
3,Apex Temple Court Hotel,"London, United Kingdom",Everything,8/3/2017,10.0,[]
4,Apex Temple Court Hotel,"London, United Kingdom",Great Hotel with wonderful staff and good cui...,8/3/2017,7.9,"[(""wonderful"",""staff""), (""good"",""cuisine"")]"
5,The Park Grand London Paddington,"London, United Kingdom",Location,8/3/2017,5.4,[]
6,The Park Grand London Paddington,"London, United Kingdom",The front desk staff were very helpful and pl...,8/3/2017,7.5,"[(""front"",""staff""), (""staff"",""desk""), (""very h..."
7,The Park Grand London Paddington,"London, United Kingdom",Friendly atmosphere beds comfortable,8/3/2017,8.3,"[(""Friendly"",""beds""), (""beds"",""atmosphere"")]"
8,Park Plaza County Hall London,"London, United Kingdom",Just the location and view,8/3/2017,5.0,[]
9,Park Plaza County Hall London,"London, United Kingdom",Around the corner from the London eye and use...,8/3/2017,7.5,"[(""was"",""pickup""), (""cruise"",""river""), (""eatin..."


In [39]:
global best_review
best_review = {}
data['Sentiment'] = data.apply(opinion_sentiment, axis=1)
print(len(data))
data.head()

10051


Unnamed: 0,Hotel,City,Reviews,Date,Score,Aspect,Sentiment
0,Hotel Arena,"Amsterdam, Netherlands",Only the park outside of the hotel was beauti...,8/3/2017,2.9,"[(""beautiful"",""park"")]","{'park': [0.5994, 0]}"
1,Hotel Arena,"Amsterdam, Netherlands",No real complaints the hotel was great great ...,8/3/2017,7.5,"[(""real"",""complaints""), (""surroundings"",""compl...","{'complaints': [0, 0], 'bit': [0, 0], 'site': ..."
2,K K Hotel George,"London, United Kingdom",Very comfortable beds smart bathroom good sho...,8/3/2017,9.6,"[(""very comfortable"",""beds""), (""smart"",""bathro...","{'beds': [0.5563, 0], 'staff': [0.4939, 0], 'd..."
3,Apex Temple Court Hotel,"London, United Kingdom",Everything,8/3/2017,10.0,[],{}
4,Apex Temple Court Hotel,"London, United Kingdom",Great Hotel with wonderful staff and good cui...,8/3/2017,7.9,"[(""wonderful"",""staff""), (""good"",""cuisine"")]","{'staff': [0.5719, 0], 'cuisine': [0.4404, 0]}"


In [40]:
# Store the aspect sample data
outfile = open("Stored Data/sample_review_w_aspects.pickle", "wb")
pk.dump(data, outfile)
outfile.close()

# Store the best reviews data
outfile = open("Stored Data/best_reviews_dict.pickle", "wb")
pk.dump(best_review, outfile)
outfile.close()

data.to_csv(r'Stored Data/sample_review_w_aspects.csv')