In [2]:
# Import Libraries
import pandas as pd
import pickle as pk
import numpy as np
from collections import defaultdict

In [3]:
# Import processed sample data from folder
infile = open("Stored Data/sample_review_w_aspects.pickle", "rb")
data = pk.load(infile)
infile.close()

data.head(3)

Unnamed: 0,Hotel,Reviews,Aspect_Sentiment
0,Hotel Arena,Only the park outside of the hotel was beauti...,"{'park': [0.75, 0]}"
1,Hotel Arena,No real complaints the hotel was great great ...,"{'deposit': [2.875, -1.375], 'payment': [2.875..."
2,Hotel Arena,Location was good and staff were ok It is cut...,"{'hotel': [0.5, 0], 'location': [0.5, 0], 'bre..."


In [4]:
# Merges two dictionary into one, combines the values of its corresponding keys
def merge_dictionies(dict1, dict2):

    dict_merge = defaultdict(list)

    for d in (dict1, dict2):
        for key, value in d.items():
            dict_merge.setdefault(key, [0,0])
            dict_merge[key][0] += value[0]
            dict_merge[key][1] += value[1]
    
    return dict_merge

# merge_dictionies({"a":[5,4]},{"a":[5,4]})

In [5]:
# Convert pandas DataFrame to python List
array = sorted(data.values.tolist())

# Get all the hotel(unique) names
hotels = data['Hotel'].tolist()
hotels = np.unique(hotels)

hotels

array(['Apex Temple Court Hotel', 'Hotel Arena', 'K K Hotel George',
       'The Park Grand London Paddington', 'The Principal London'],
      dtype='<U32')

In [6]:
def unify_hotels(hotels, cluster_data):
    count = 0
    hotel_single = []
    for hotel in hotels:
        
        dict1 = {}
        while hotel == cluster_data[count][0]:
            dict1 = merge_dictionies(dict1, cluster_data[count][2])
            if count == len(cluster_data)-1:
                break
            else:
                count += 1
            
        hotel_single.append([cluster_data[count-1][0], dict1, count])
        dict1 = {}
        
    return hotel_single

hotel_info = unify_hotels(hotels, array)
hotel_info

[['Apex Temple Court Hotel',
  defaultdict(list,
              {'a': [0.5, 0],
               'a la carte': [0, 0],
               'a lot': [3.25, -1.5],
               'access': [0, 0],
               'acknowledgement': [0, -1.25],
               'adapter': [0, -0.625],
               'advance': [1.5, -0.625],
               'afternoon': [0.875, -0.625],
               'age': [1.875, -0.875],
               'air': [1.625, -0.75],
               'aldwych theatre': [3.0, -1.25],
               'alex hotel couldn t': [0, 0],
               'ambience': [1.75, -0.5],
               'anything': [0.875, 0],
               'apart': [1.0, -0.625],
               'apex': [3.25, -1.125],
               'apex duck': [7.75, -1.5],
               'apex duck would': [2.625, 0],
               'apex hotel': [1.75, 0],
               'area': [5.375, -0.75],
               'atmosphere': [5.25, -2.125],
               'aways': [0, -0.25],
               'backdrop': [1.25, 0],
               'bacon': [0.

In [7]:
def unify_aspects(hotels_w_aspects):
    dict_aspect = {}
    
    for hotel in hotels_w_aspects:
        for key, value in hotel[1].items():
            
            if key in dict_aspect:
                dict_aspect[key] += [[value[0], hotel[0]]]
                dict_aspect[key] = sorted(dict_aspect[key])[::-1]
            else:
                dict_aspect.setdefault(key, [[value[0], hotel[0]]])
                
    return dict_aspect

aspects_info = unify_aspects(hotel_info)
aspects_info

{'inquiry': [[2.375, 'K K Hotel George']],
 'wardrobe didn t mark sense': [[5.875, 'The Park Grand London Paddington']],
 'weather': [[1.25, 'Apex Temple Court Hotel']],
 'anything': [[4.125, 'Hotel Arena'],
  [1.625, 'The Park Grand London Paddington'],
  [0.875, 'Apex Temple Court Hotel']],
 'bar variety': [[0, 'Apex Temple Court Hotel']],
 'night s sleep': [[6.75, 'Apex Temple Court Hotel']],
 'bathtub': [[0, 'Hotel Arena']],
 'covent garden': [[3.0, 'Apex Temple Court Hotel']],
 'covent garden soho': [[0, 'Apex Temple Court Hotel']],
 'excellent': [[3.625, 'K K Hotel George'],
  [1.625, 'Apex Temple Court Hotel'],
  [1.5, 'The Park Grand London Paddington']],
 'room hidden courtyard view': [[5.0, 'Apex Temple Court Hotel']],
 'selection left': [[1.0, 'The Principal London']],
 'bath tap': [[2.25, 'The Principal London']],
 'microwave': [[0, 'The Park Grand London Paddington']],
 'label club lounge': [[5.5, 'The Park Grand London Paddington']],
 'safety requirement': [[1.0, 'K K Hot

In [9]:
# Store the hotel data
outfile = open("Stored Data/sample_hotel_info.pickle", "wb")
pk.dump(hotel_info, outfile)
outfile.close()

# Store the aspect data
outfile = open("Stored Data/sample_aspect_info.pickle", "wb")
pk.dump(aspects_info, outfile)
outfile.close()