In [1]:
# Import Libraries
import pandas as pd
import pickle as pk
import numpy as np
from collections import defaultdict
import time

In [2]:
# Import processed sample data from folder
infile = open("Stored Data/sample_review_w_aspects.pickle", "rb")
data = pk.load(infile)
infile.close()

data.head(3)

Unnamed: 0,Hotel,Reviews,Aspect_Sentiment
0,Hotel Arena,Only the park outside of the hotel was beauti...,"{'park': [0.75, 0]}"
1,Hotel Arena,No real complaints the hotel was great great ...,"{'spot': [0.375, 0], 'check': [0.875, -0.5], '..."
2,Hotel Arena,Location was good and staff were ok It is cut...,"{'hotel': [0.5, 0], 'staff': [0.5, 0], 'locati..."


In [3]:
# Merges two dictionary into one, combines the values of its corresponding keys
def merge_dictionies(dict1, dict2):

    dict_merge = defaultdict(list)

    for d in (dict1, dict2):
        for key, value in d.items():
            dict_merge.setdefault(key, [0,0])
            dict_merge[key][0] += value[0]
            dict_merge[key][1] += value[1]
    
    return dict_merge

# merge_dictionies({"a":[5,4]},{"a":[5,4]})

In [4]:
# Convert pandas DataFrame to python List
array = sorted(data.values.tolist())

# Get all the hotel(unique) names
hotels = data['Hotel'].tolist()
hotels = np.unique(hotels)

hotels

array(['Apex Temple Court Hotel', 'Hotel Arena', 'K K Hotel George',
       'The Park Grand London Paddington', 'The Principal London'],
      dtype='<U32')

In [5]:
def unify_hotels(hotels, cluster_data):
    count = 0
    hotel_single = []
    for hotel in hotels:
        
        dict1 = {}
        while hotel == cluster_data[count][0]:
            dict1 = merge_dictionies(dict1, cluster_data[count][2])
            if count == len(cluster_data)-1:
                break
            else:
                count += 1
            
        hotel_single.append([cluster_data[count-1][0], dict1, count])
        dict1 = {}
        
    return hotel_single

hotel_info = unify_hotels(hotels, array)
hotel_info

[['Apex Temple Court Hotel',
  defaultdict(list,
              {'access': [0, 0],
               'acknowledgement': [0, -0.625],
               'adapter': [0, -0.625],
               'advance': [1.0, -0.625],
               'afternoon': [0, 0],
               'age': [1.875, -0.875],
               'air': [1.125, -0.125],
               'aldwych theatre': [0, 0],
               'alex hotel': [0, 0],
               'ambience': [0.5, -0.5],
               'anything': [0, 0],
               'apex': [3.625, 0],
               'apex duck would': [0, 0],
               'apex hotel': [1.75, 0],
               'area': [3.5, -0.25],
               'atmosphere': [1.0, -2.0],
               'aways': [0, -0.25],
               'backdrop': [1.25, 0],
               'bacon': [1.0, -1.5],
               'badge': [0, 0],
               'balcony': [0.5, -1.5],
               'band': [0.5, 0],
               'bar': [0.125, -2.625],
               'bar area': [1.0, 0],
               'bar room': [2.5, -0.

In [6]:
def unify_aspects(hotels_w_aspects):
    dict_aspect = {}
    
    for hotel in hotels_w_aspects:
        for key, value in hotel[1].items():
            
            if key in dict_aspect:
                dict_aspect[key] += [[value[0], hotel[0]]]
                dict_aspect[key] = sorted(dict_aspect[key])[::-1]
            else:
                dict_aspect.setdefault(key, [[value[0], hotel[0]]])
                
    return dict_aspect


start_time = time.time()

aspects_info = unify_aspects(hotel_info)

print("--- %s seconds ---" % (time.time() - start_time))

aspects_info

--- 0.0059239864349365234 seconds ---


{'bath quiet': [[0.5, 'Apex Temple Court Hotel']],
 'staff walking': [[1.0, 'Apex Temple Court Hotel']],
 'line': [[3.125, 'The Principal London'],
  [1.25, 'K K Hotel George'],
  [0, 'Hotel Arena']],
 'card': [[0.875, 'Apex Temple Court Hotel'],
  [0.375, 'The Principal London'],
  [0.375, 'Hotel Arena'],
  [0, 'The Park Grand London Paddington']],
 'drink etc': [[1.5, 'K K Hotel George']],
 'storage': [[0.625, 'K K Hotel George']],
 'neck room service': [[1.5, 'Hotel Arena']],
 'elevator': [[1.125, 'The Principal London']],
 'tea coffee': [[0, 'Hotel Arena']],
 'door staff': [[2.0, 'The Principal London']],
 'monastery building': [[1.5, 'Hotel Arena']],
 'someone drilling concrete': [[1.125, 'Hotel Arena']],
 'invitation': [[0, 'Apex Temple Court Hotel']],
 'centre': [[1.25, 'Hotel Arena'],
  [1.125, 'Apex Temple Court Hotel'],
  [0.25, 'The Park Grand London Paddington']],
 'bacon': [[2.125, 'The Principal London'],
  [1.0, 'Apex Temple Court Hotel'],
  [0.375, 'K K Hotel George'],


In [7]:
# Store the hotel data
outfile = open("Stored Data/sample_hotel_info.pickle", "wb")
pk.dump(hotel_info, outfile)
outfile.close()

# Store the aspect data
outfile = open("Stored Data/sample_aspect_info.pickle", "wb")
pk.dump(aspects_info, outfile)
outfile.close()