In [66]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Pandas Settings
pd.set_option('display.max_columns', 10000)
pd.set_option('display.max_rows', 100)

In [45]:
df = pd.read_csv('../csv/Hotel_Review_Spell_Checked.csv', index_col=0)

In [46]:
df.drop(columns=['Unnamed: 0.1'], inplace=True)

In [47]:
vader = SentimentIntensityAnalyzer()

In [48]:
polarity = lambda x: vader.polarity_scores(x)

In [49]:
df['Polarity'] = df['Spell_Checked'].apply(polarity)

In [51]:
df.head(2)

Unnamed: 0,Hotel_Name,Negative_Review,Positive_Review,Reviewer_Score,Reviews_Clean,Score,Spell_Checked,Polarity
0,St James Court A Taj Hotel London,No Negative,the location was perfect,9.6,no negative the location was perfect,1,no negative the location was perfect,"{'neg': 0.0, 'neu': 0.374, 'pos': 0.626, 'comp..."
1,H10 Metropolitan 4 Sup,Nothing,Everything was top notch staff were impeccable,10.0,nothing everything was top notch staff were ...,1,nothing everything was top notch staff were ...,"{'neg': 0.185, 'neu': 0.815, 'pos': 0.0, 'comp..."


In [53]:
df = pd.concat([df.drop(['Hotel_Name','Reviews_Clean'], axis=1), df['Polarity'].apply(pd.Series)], axis=1)

In [55]:
df.head(10)

Unnamed: 0,Negative_Review,Positive_Review,Reviewer_Score,Score,Spell_Checked,Polarity,neg,neu,pos,compound
0,No Negative,the location was perfect,9.6,1,no negative the location was perfect,"{'neg': 0.0, 'neu': 0.374, 'pos': 0.626, 'comp...",0.0,0.374,0.626,0.7716
1,Nothing,Everything was top notch staff were impeccable,10.0,1,nothing everything was top notch staff were ...,"{'neg': 0.185, 'neu': 0.815, 'pos': 0.0, 'comp...",0.185,0.815,0.0,-0.1511
2,Prices are elevated,Location was perfect if you want to go to the...,9.2,1,prices are elevated location was perfect if ...,"{'neg': 0.0, 'neu': 0.688, 'pos': 0.312, 'comp...",0.0,0.688,0.312,0.6124
3,No Negative,Great location right in the inner stadt close...,9.6,1,no negative great location right in the inner ...,"{'neg': 0.078, 'neu': 0.638, 'pos': 0.284, 'co...",0.078,0.638,0.284,0.8273
4,There was nothing that I could say I was not ...,Lovely room very comfortable cleanliness exce...,10.0,1,there was nothing that i could say i was not ...,"{'neg': 0.118, 'neu': 0.487, 'pos': 0.394, 'co...",0.118,0.487,0.394,0.9346
5,there are currently construction works outsid...,the location was very central,8.3,1,there are currently construction works outsid...,"{'neg': 0.122, 'neu': 0.878, 'pos': 0.0, 'comp...",0.122,0.878,0.0,-0.3612
6,No face washers in rooms Tissue box not refil...,Rooms were clean,7.9,1,no face rashers in rooms tissue box not fille...,"{'neg': 0.245, 'neu': 0.562, 'pos': 0.194, 'co...",0.245,0.562,0.194,-0.0222
7,No Negative,Great staff comfy beds,10.0,1,no negative great staff come beds,"{'neg': 0.32, 'neu': 0.389, 'pos': 0.291, 'com...",0.32,0.389,0.291,-0.0762
8,Hotel is situated in a very loud street and e...,Excellent location staff very friendly and he...,9.2,1,hotel is situated in a very loud street and e...,"{'neg': 0.068, 'neu': 0.768, 'pos': 0.164, 'co...",0.068,0.768,0.164,0.8253
9,The decor we found it too minimalist for our ...,Loved the bed and the size of the room,9.2,1,the door we found it too minimalist for our t...,"{'neg': 0.0, 'neu': 0.822, 'pos': 0.178, 'comp...",0.0,0.822,0.178,0.5994


In [68]:
no_negative = df[df['Negative_Review'] == 'No Negative']
no_positive = df[df['Positive_Review'] == 'No Positive']

In [69]:
no_negative.groupby('Score').mean()

Unnamed: 0_level_0,Reviewer_Score,neg,neu,pos,compound
Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,4.575,0.089007,0.583637,0.327327,0.311574
1,9.38974,0.0619,0.510314,0.427782,0.720115


In [67]:
no_negative.groupby('Score').describe()

Unnamed: 0_level_0,Reviewer_Score,Reviewer_Score,Reviewer_Score,Reviewer_Score,Reviewer_Score,Reviewer_Score,Reviewer_Score,Reviewer_Score,neg,neg,neg,neg,neg,neg,neg,neg,neu,neu,neu,neu,neu,neu,neu,neu,pos,pos,pos,pos,pos,pos,pos,pos,compound,compound,compound,compound,compound,compound,compound,compound
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
Score,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2
0,284.0,4.575,1.067629,2.5,3.8,5.0,5.4,5.8,284.0,0.089007,0.119254,0.0,0.0,0.0,0.1525,0.47,284.0,0.583637,0.191975,0.143,0.403,0.603,0.751,0.927,284.0,0.327327,0.197474,0.022,0.14975,0.2805,0.5,0.83,284.0,0.311574,0.431542,-0.9453,0.0951,0.4585,0.5975,0.974
1,3002.0,9.38974,0.772413,6.3,9.2,9.6,10.0,10.0,3002.0,0.0619,0.09755,0.0,0.0,0.0,0.104,0.462,3002.0,0.510314,0.158532,0.074,0.395,0.509,0.629,0.921,3002.0,0.427782,0.143089,0.05,0.322,0.428,0.531,0.809,3002.0,0.720115,0.283328,-0.8112,0.5994,0.82435,0.9231,0.9975


In [70]:
no_positive.groupby('Score').mean()

Unnamed: 0_level_0,Reviewer_Score,neg,neu,pos,compound
Score,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,4.587668,0.18459,0.676807,0.138599,-0.224394
1,7.988995,0.169912,0.623845,0.206236,0.014253


In [73]:
no_positive.groupby('Score').describe()

Unnamed: 0_level_0,Reviewer_Score,Reviewer_Score,Reviewer_Score,Reviewer_Score,Reviewer_Score,Reviewer_Score,Reviewer_Score,Reviewer_Score,neg,neg,neg,neg,neg,neg,neg,neg,neu,neu,neu,neu,neu,neu,neu,neu,pos,pos,pos,pos,pos,pos,pos,pos,compound,compound,compound,compound,compound,compound,compound,compound
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
Score,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2
0,2376.0,4.587668,0.981704,2.5,3.8,4.6,5.4,5.8,2376.0,0.18459,0.144827,0.0,0.089,0.158,0.258,0.838,2376.0,0.676807,0.174902,0.0,0.584,0.717,0.807,0.948,2376.0,0.138599,0.165003,0.0,0.0,0.087,0.191,0.872,2376.0,-0.224394,0.561668,-0.989,-0.72165,-0.4449,0.3313,0.996
1,627.0,7.988995,1.113534,6.0,7.1,7.9,8.8,10.0,627.0,0.169912,0.160776,0.0,0.0,0.147,0.2535,0.837,627.0,0.623845,0.183416,0.0,0.526,0.66,0.7665,0.947,627.0,0.206236,0.203347,0.0,0.0,0.154,0.34,0.906,627.0,0.014253,0.550464,-0.9252,-0.4449,0.1037,0.5574,0.9836


In [75]:
no_pos_neg = df[(df['Positive_Review'] == 'No Positive') & (df['Negative_Review'] == 'No Negative')]

In [77]:
no_pos_neg.shape

(5, 10)