In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
reviews_by_restaurant = []
restaurant_names = []
for filename in os.listdir('reviews'):
    df = pd.read_csv(f'reviews/{filename}')
    combined = ' '.join(df['review'].to_list())
    reviews_by_restaurant.append(combined)
    restaurant_names.append(filename[:-4])
print(len(reviews_by_restaurant))

438


In [3]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer

# Remove stopwords
stop_words = set(stopwords.words("english"))
punctuation = [',', '.', ':', ';', '!', '?', '(', ')', '[', ']', '{', '}', '’']
for p in punctuation:
    stop_words.add(p)
stop_words.add('food')
stop_words.add('would')
stop_words.add('restaurant')
stop_words.add('great')
stop_words.add('good')
stop_words.add('us')

processed_texts = []
for text in reviews_by_restaurant:
    tokens = word_tokenize(text.lower())
    filtered_tokens = [token for token in tokens if not (token in stop_words)]
    processed_text = " ".join(filtered_tokens)
    processed_texts.append(processed_text)

# Convert texts to tfidf
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(processed_texts)

keywords = {}
# Get tokens with highest tfidf values for each text
for i, text in enumerate(processed_texts):
    feature_index = tfidf_matrix[i,:].nonzero()[1]
    tfidf_scores = zip(feature_index, [tfidf_matrix[i, x] for x in feature_index])
    # sorted_tfidf_scores = sorted(tfidf_scores, key=lambda x: x[1], reverse=True)
    # top_tfidf_tokens = [vectorizer.get_feature_names()[index] for index, score in sorted_tfidf_scores[:int(len(sorted_tfidf_scores)*0.01)]]
    top_tfidf_tokens = [vectorizer.get_feature_names()[x[0]] for x in tfidf_scores if x[1] >= 0.12]
    tokenized_name = word_tokenize(restaurant_names[i].lower())
    top_tfidf_tokens = [token for token in top_tfidf_tokens if token not in tokenized_name]
    print(f"{restaurant_names[i]}: {top_tfidf_tokens}")
    keywords[restaurant_names[i]] = top_tfidf_tokens



10 Scotts: ['scones', 'staff', 'oysters', 'buffet', 'service', 'scott', 'hyatt', 'tea', 'high']
15 Stamford By Alvin Leung: ['michael', 'laksa', 'kempinski', 'kaya', 'service']
21 on Rajah: ['syahmi', 'nada', 'azlan', 'nan', 'event', 'buffet', 'service']
25 Degrees: ['dines', 'rajesh', 'raj', 'jolene', 'arif', 'dinesh', 'gina', 'burgers', 'burger', 'shankri', 'service']
665°F: ['xuan', 'michelle', 'remon', 'lucas', 'dana', '665', 'colin', '665f', 'steak', 'view', 'service']
8 Korean BBQ: ['pancake', 'stew', 'kimchi', 'pork', 'meat', 'staff', 'service']
Acqua e Farina: ['roberto', 'antonio', 'pizza', 'italian', 'pasta', 'service']
Akira Back: ['evelyn', 'mathan', 'hyun', 'jenny', 'alisa', 'pizza', 'tuna', 'rock', 'service']
Al Borgo: ['pizzas', 'pizza', 'italian', 'pasta', 'service']
Al Forno: ['alforno', 'pizzas', 'pizza', 'italian', 'pasta', 'service']
Alati Divine Greek Cuisine: ['greece', 'dips', 'pita', 'octopus', 'service']
Alley on 25: ['jonas', 'horman', 'caleb', 'andaz', 'servi

Din Tai Fung (RWS Branch): ['dtf', 'bao', 'xiao', 'dumplings', 'dumpling', 'sentosa', 'sum', 'dim', 'rice']
Din Tai Fung, Suntec City: ['dtf', 'bao', 'xiao', 'dumplings', 'dim', 'pork']
Ding Dong: ['scotch', 'vietnamese', 'curry', 'asian', 'place', 'service']
Dolce Vita: ['ck', 'hucksen', 'joshua', 'service']
Don Quijote Spanish Restaurant: ['sangria', 'paella', 'tapas', 'service']
Dusk Restaurant & Bar: ['ajaib', 'david', 'faber', 'cable', 'sunset', 'view', 'staff', 'service']
Earle Swensen's - VivoCity: ['swensens', 'view', 'spread', 'nice', 'bar', 'salad', 'buffet', 'service', 'ambience']
East Coast Lagoon Food Village: ['haron', 'stingray', 'stall', 'beach', 'stalls', 'hawker', 'satay', 'place']
Edge Food Theatre: ['janice', 'pacific', 'sonia', 'pan', 'seafood', 'staff', 'buffet', 'service']
elemen: ['millenia', 'meal', 'staff', 'served', 'set', 'service', 'time', 'course']
Element at Amara Singapore Hotel: ['rav', 'joanne', 'sharon', 'buffet', 'service']
Elixir Boutique Roasters: 

LeVeL33: ['mhel', '33', 'bay', 'terrace', 'views', 'beer', 'view', 'service']
Lighthouse Bistro & Bar: ['union', 'fisherman', 'pasta', 'service']
Lime House Caribbean: ['rums', 'jarek', 'jamaican', 'limehouse', 'jerk', 'doubles', 'goat', 'rum', 'curry', 'cocktails']
Lime Restaurant: ['eileen', 'laksa', 'staff', 'buffet', 'service']
Limoncello Pizza and Grill: ['italian', 'river', 'pasta', 'service']
Little Island Brewing Co @ South Beach: ['brennan', 'lab', 'alchemist', 'infused', 'stout', 'craft', 'beers', 'beer', 'place']
Lobby Lounge at Conrad Centennial Singapore: ['lemuel', 'jeanette', 'jinnie', 'kalai', 'theresa', 'ian', 'linda', 'krish', 'service', 'tea']
Lola's Cafe: ['kovan', 'paprika', 'wings', 'service']
Lolla: ['urchin', 'pudding']
Long Beach Seafood Restaurant: ['dempsey', 'chili', 'pepper', 'chilli', 'crab', 'black']
LONG BEACH UDMC: ['razor', 'crabs', 'pepper', 'chilli', 'crab', 'black', 'seafood', 'service']
Loulou - French Cuisine & Wine Bar: ['raymond', 'jeffrey', 'ra

RedDot Brewhouse: ['nishant', 'monster', 'river', 'beers', 'quay', 'boat', 'green', 'beer', 'place', 'service']
Republic: ['sandrine', 'wahida', 'liz', 'chihuly', 'rudi', 'dunstan', 'jenny', 'gina', 'lounge', 'bar', 'service', 'tea']
Riders Cafe: ['stables', 'horses', 'rosti', 'balcony', 'nice', 'service']
Ristorante Da Valentino: ['italian', 'pasta', 'service']
Rosso Vino: ['eva', 'raman', 'sandy', 'service']
Rubato Italian Kitchen & Wine Bar: ['pizza', 'tiramisu', 'pasta', 'service']
Ruth's Chris Steak House: ['ribeye', 'steaks', 'service']
Sabai Fine Thai on the Bay: ['curry', 'service']
Salt tapas & bar: ['raffles', 'nice', 'service']
Samy's Curry: ['mysore', 'dempsey', 'leaf', 'mutton', 'masala', 'indian', 'banana', 'rice', 'fish', 'head']
Saveur (Purvis): ['mignon', 'confit', 'french', 'gras', 'foie', 'duck', 'service']
Sawadee Thai Cuisine: ['tom', 'rice', 'curry', 'service']
Seasonal Tastes: ['guven', 'westin', 'lobsters', 'view', 'seafood', 'spread', 'staff', 'buffet', 'servic

Triple Three: ['william', 'sashimi', 'seafood', 'buffet', 'service']
Tsukiji Fish Market: ['chirashi', 'ramen', 'jack', 'tempura', 'sashimi', 'sushi', 'view', 'salmon', 'fresh', 'oysters', 'ordered', 'service']
TWG Tea at ION Orchard: ['scones', 'teas']
TWG Tea Garden at Marina Bay Sands: ['scones', 'teas']
Two Chefs Bar Mexican & Italian: ['cove', 'lea', 'quayside', 'fajitas', 'ash', 'vicky', 'sentosa', 'margarita', 'pizza', 'staff', 'service']
Uncle Leong Seafood: ['sand', 'crabs', 'chilli', 'crab', 'shimmering', 'service']
Uya Japanese Unagi Restaurant: ['hitsumabushi', 'eel']
Vatan Se: ['paneer', 'tikka', 'indian', 'service']
Vatos Urban Tacos: ['galbi', 'mexican', 'taco', 'service']
Verre Modern Bistro & Wine Bar: ['river', 'service']
Vespetta Italian Restaurant: ['pizza', 'river', 'quay', 'boat', 'wine', 'service']
Violet Oon Singapore ION: ['peranakan', 'pie', 'rendang', 'service', 'tea']
Violet Oon Singapore: ['syarif', 'prata', 'peranakan', 'jewel', 'rendang', 'staff', 'servic

In [4]:
from nltk.collocations import BigramAssocMeasures, BigramCollocationFinder

def get_bigrams(tokens, score_fn=BigramAssocMeasures.raw_freq, n=200):
    bigram_finder = BigramCollocationFinder.from_words(tokens)
    bigrams = bigram_finder.nbest(score_fn, 5)
    return bigrams

# Get the bigrams
for i, restaurant_reviews in enumerate(processed_texts):
    bigrams = get_bigrams(word_tokenize(restaurant_reviews))
    filtered_bigrams = keywords[restaurant_names[i]]
    for bigram in bigrams:
        if bigram[0] in keywords[restaurant_names[i]]:
            filtered_bigrams.remove(bigram[0])
            if not f'{bigram[0]} {bigram[1]}' in filtered_bigrams:
                filtered_bigrams.append(f'{bigram[0]} {bigram[1]}')
            continue
        if bigram[1] in keywords[restaurant_names[i]]:
            filtered_bigrams.remove(bigram[1])
            if not f'{bigram[0]} {bigram[1]}' in filtered_bigrams:
                filtered_bigrams.append(f'{bigram[0]} {bigram[1]}')
    print(f'{restaurant_names[i]}: {filtered_bigrams}')
    keywords[restaurant_names[i]] = filtered_bigrams

10 Scotts: ['scones', 'staff', 'oysters', 'buffet', 'service', 'high tea', 'afternoon tea', 'grand hyatt', '10 scott']
15 Stamford By Alvin Leung: ['michael', 'laksa', 'kempinski', 'kaya', 'excellent service']
21 on Rajah: ['syahmi', 'nada', 'azlan', 'nan', 'event', 'service', 'buffet spread']
25 Degrees: ['dines', 'jolene', 'arif', 'dinesh', 'gina', 'burgers', 'burger', 'shankri', 'raj gina', 'service raj', 'service rajesh']
665°F: ['michelle', 'remon', 'lucas', 'dana', '665', 'colin', '665f', 'steak', 'view', 'excellent service', 'jia xuan']
8 Korean BBQ: ['pancake', 'kimchi', 'meat', 'staff', 'pork belly', 'service staff', 'army stew']
Acqua e Farina: ['roberto', 'antonio', 'pizza', 'pasta', 'service', 'authentic italian']
Akira Back: ['evelyn', 'mathan', 'hyun', 'jenny', 'alisa', 'pizza', 'rock', 'service', 'tuna pizza']
Al Borgo: ['pizzas', 'pizza', 'pasta', 'service', 'authentic italian']
Al Forno: ['alforno', 'pizzas', 'italian', 'pasta', 'service', 'best pizza']
Alati Divine Gr

Dusk Restaurant & Bar: ['ajaib', 'david', 'faber', 'sunset', 'staff', 'excellent service', 'cable car', 'nice view']
Earle Swensen's - VivoCity: ['swensens', 'view', 'spread', 'bar', 'service', 'ambience', 'salad bar', 'salad buffet', 'nice ambience']
East Coast Lagoon Food Village: ['stingray', 'stall', 'beach', 'stalls', 'hawker', 'satay', 'place', 'haron satay']
Edge Food Theatre: ['janice', 'pacific', 'sonia', 'seafood', 'staff', 'buffet', 'pan pacific', 'service staff']
elemen: ['millenia', 'meal', 'staff', 'served', 'time', 'set meal', 'main course', 'service staff']
Element at Amara Singapore Hotel: ['rav', 'joanne', 'sharon', 'buffet', 'excellent service']
Elixir Boutique Roasters: ['truffle', 'toast', 'prosciutto', 'brunch', 'works', 'really', 'french toast', 'scrambled eggs', 'breakfast works', 'eggs prosciutto']
Ellenborough Market Cafe: ['harry', 'pengat', 'peranakan', 'spread', 'buffet', 'durian pengat', 'service staff']
Empress: ['polaroid', 'service staff']
En Sushi: ['c

Lawry's The Prime Rib: ['lawrys', 'shiva', 'excellent service']
Le Bistrot du Sommelier: ['onglet', 'rillettes', 'steak', 'service', 'wine list', 'authentic french']
Le Noir Bar & Lounge: ['swati', 'jeremy', 'band', 'dee', 'bands', 'view', 'friendly', 'place', 'drinks', 'service', 'live band', 'live music', 'staff friendly']
Le Petit Chef - Singapore: ['iggie', 'cj', 'animation', 'interactive', 'entertaining', 'host', 'dining experience', 'grand hyatt']
Leo Cafe & Restaurant: ['uncle leo']
Les Bouchons Ann Siang: ['zet', 'steak', 'fries', 'steaks', 'mark', 'service', 'onion soup', 'french onion']
Les Bouchons Robertson Quay: ['hafiz', 'steak', 'steaks', 'wine', 'service', 'medium rare', 'flow fries']
LeVeL33: ['mhel', 'terrace', 'views', 'beer', 'level 33', 'marina bay', 'view marina', 'service excellent']
Lighthouse Bistro & Bar: ['union', 'fisherman', 'service', 'lobster pasta']
Lime House Caribbean: ['rums', 'jarek', 'jamaican', 'limehouse', 'doubles', 'rum', 'cocktails', 'jerk chic

Punjab Grill: ['ramesh', 'paan', 'tikka', 'indian', 'service', 'chef javed']
Quentin's Eurasian Restaurant: ['bostador', 'debal', 'devil', 'oxtail', 'sugee cake', 'live band', "'s curry"]
Ramen Nagi: ['tonkotsu', 'broth', 'king ramen']
RAS The Essence of India: ['ajeesh', 'river', 'quay', 'clarke quay', 'service excellent', 'indian cuisine']
Red Eye Smokehouse: ['bbq', 'meats', 'brisket', 'meat', 'place', 'beef brisket', 'pork ribs', 'short ribs']
Red House Seafood (Grand Copthorne): ['robertson', 'crabs', 'quay', 'singapore', 'service', 'chilli crab', 'pepper crab', 'chili crab']
Red Sparrow: ['bo', 'pho', 'rolls', 'beef pho', 'authentic vietnamese']
RedDot BrewHouse Dempsey: ['beers', 'beer', 'christmas', 'place', 'red dot', 'staff friendly', 'service staff']
RedDot Brewhouse: ['nishant', 'monster', 'river', 'beers', 'quay', 'place', 'service', 'boat quay', 'green beer', 'craft beer']
Republic: ['sandrine', 'wahida', 'liz', 'rudi', 'dunstan', 'jenny', 'gina', 'lounge', 'high tea', 'c

The Lighthouse: ['raffy', 'giri', 'khalid', 'fullerton', 'views', 'jay', 'bar', 'rooftop bar', 'marina bay', 'view marina', 'bay sands']
The Line: ['ameen', 'shirley', 'rachel', 'jaya', 'ruby', 'shangri', 'spread', 'rhea', 'buffet', 'service staff']
The Lobby Lounge at InterContinental Singapore: ['hayden', 'haris', 'tung', 'praveen', 'hani', 'high tea', 'excellent service', 'service staff']
The Lobby Lounge: ['yssa', 'camille', 'belinda', 'alyssa', 'katrina', 'wendy', 'angel', 'shangri', 'staff', 'high tea', 'service staff']
The Marmalade Pantry at Downtown: ['solemnisation', 'service', 'kannan team', 'oasia hotel']
The Marmalade Pantry at Novena: ['syam', 'charmaine', 'nice', 'staff', 'service staff', 'high tea']
The Mind Cafe: ['games', 'game', 'fun', 'free flow', 'flow drinks', 'drinks snacks', 'board games']
The Orchard Cafè: ['soh', 'johnny', 'staff', 'tina soh', 'orchard cafe', 'excellent service', 'buffet lunch']
The Peranakan: ['keluak', 'buah keluak']
The Populus Coffee and 

In [5]:
import string

def preprocess_sentence(sentence):
    new_sentence = ' '.join(sentence.splitlines())
    return new_sentence.lower().translate(str.maketrans('', '', string.punctuation))

In [6]:
import string

review_dict = {}
for filename in os.listdir('reviews'):
    reviews = pd.read_csv('reviews/' + filename)['review'].to_list()
    inputs = []
    for review in reviews:
        sentences = nltk.sent_tokenize(review)
        for sent in sentences:
            found_keywords = []
            for keyword in keywords[filename[:-4]]:
                if keyword in sent.lower().strip():
                    found_keywords.append(keyword)
            if found_keywords:
                inputs.append({'aspects': found_keywords, 'sentence': preprocess_sentence(sent)})
    review_dict[filename[:-4]] = inputs

In [9]:
from sgnlp.models.sentic_gcn import(
    SenticGCNBertConfig,
    SenticGCNBertModel,
    SenticGCNBertEmbeddingConfig,
    SenticGCNBertEmbeddingModel,
    SenticGCNBertTokenizer,
    SenticGCNBertPreprocessor,
    SenticGCNBertPostprocessor
)

tokenizer = SenticGCNBertTokenizer.from_pretrained("bert-base-uncased")

config = SenticGCNBertConfig.from_pretrained(
    "https://storage.googleapis.com/sgnlp/models/sentic_gcn/senticgcn_bert/config.json"
)

model = SenticGCNBertModel.from_pretrained(
    "https://storage.googleapis.com/sgnlp/models/sentic_gcn/senticgcn_bert/pytorch_model.bin",
    config=config
)

embed_config = SenticGCNBertEmbeddingConfig.from_pretrained("bert-base-uncased")

embed_model = SenticGCNBertEmbeddingModel.from_pretrained("bert-base-uncased",
    config=embed_config
)

preprocessor = SenticGCNBertPreprocessor(
    tokenizer=tokenizer, embedding_model=embed_model,
    senticnet="https://storage.googleapis.com/sgnlp/models/sentic_gcn/senticnet.pickle",
    device="cpu")

postprocessor = SenticGCNBertPostprocessor()

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /bert-base-uncased/resolve/main/vocab.txt HTTP/1.1" 200 0
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'BertTokenizer'. 
The class this function is called from is 'SenticGCNBertTokenizer'.
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): storage.googleapis.com:443
DEBUG:urllib3.connectionpool:https://storage.googleapis.com:443 "GET /sgnlp/models/sentic_gcn/senticgcn_bert/config.json HTTP/1.1" 200 343


Downloading:   0%|          | 0.00/343 [00:00<?, ?B/s]

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): storage.googleapis.com:443
DEBUG:urllib3.connectionpool:https://storage.googleapis.com:443 "GET /sgnlp/models/sentic_gcn/senticgcn_bert/pytorch_model.bin HTTP/1.1" 200 7098703


Downloading:   0%|          | 0.00/7.10M [00:00<?, ?B/s]

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /bert-base-uncased/resolve/main/config.json HTTP/1.1" 200 0
Some weights of the model checkpoint at bert-base-uncased were not used when initializing SenticGCNBertEmbeddingModel: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing SenticGCNBertEmbeddingModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SenticGCNBertEmbeddingModel from the checkpoint of a model that you expect to be exactly ide

In [12]:
def summarize_outputs(outputs):
    appearance_counts = {}
    sentiments = {}
    for output in outputs:
        for i, aspect in enumerate(output['aspects']):
            aspect_term = [output['sentence'][a] for a in aspect]
            if len(aspect_term) > 1:
                aspect_term = ' '.join(aspect_term)
            else:
                aspect_term = aspect_term[0]
            if aspect_term in appearance_counts:
                appearance_counts[aspect_term] += 1
                sentiments[aspect_term] += output['labels'][i]
            else:
                appearance_counts[aspect_term] = 1
                sentiments[aspect_term] = output['labels'][i]
    result = {}
    for key in appearance_counts.keys():
        result[key] = 100 * ((sentiments[key] / appearance_counts[key]) / 2 + 0.5) # Normalize to 0-100
    return result

In [13]:
output_dictionary = {}
for restaurant_name in review_dict.keys():
    outputs = []
    for inp in review_dict[restaurant_name]:
        try:
            processed_inputs, processed_indices = preprocessor(np.array([inp]))
            raw_outputs = model(processed_indices)
            post_outputs = postprocessor(processed_inputs=processed_inputs, model_outputs=raw_outputs)
            outputs.append(post_outputs[0])
        except:
            pass
    output_dictionary[restaurant_name] = summarize_outputs(outputs)
    print(f'{restaurant_name}: {output_dictionary[restaurant_name]}')

10 Scotts: {'high tea': 81.08108108108108, 'service': 79.8507462686567, 'buffet': 68.75, '10 scott': 0.0, 'afternoon tea': 80.76923076923077, 'grand hyatt': 85.0, 'oysters': 70.45454545454545, 'staff': 72.61904761904762, 'scones': 62.5}
15 Stamford By Alvin Leung: {'kempinski': 68.18181818181819, 'laksa': 75.0, 'excellent service': 83.33333333333333, 'kaya': 81.81818181818181, 'michael': 88.88888888888889}
21 on Rajah: {'nan': 92.5925925925926, 'service': 90.0, 'event': 83.33333333333333, 'azlan': 84.61538461538461, 'nada': 81.81818181818181, 'buffet spread': 83.33333333333333, 'syahmi': 78.57142857142857}
25 Degrees: {'shankri': 90.90909090909092, 'burger': 82.6086956521739, 'burgers': 96.875, 'gina': 94.44444444444444, 'arif': 93.75, 'dinesh': 90.0, 'jolene': 85.0, 'dines': 95.0}
665°F: {'steak': 64.28571428571428, 'view': 87.5, '665f': 87.5, 'colin': 75.0, 'dana': 88.46153846153845, '665': 76.66666666666666, 'lucas': 95.45454545454545, 'remon': 72.72727272727273, 'michelle': 89.2857

Blue Jasmine: {'rice': 93.10344827586206, 'service': 81.70731707317074, 'sticky rice': 96.15384615384616, 'tom yum': 83.33333333333333, 'pad thai': 92.85714285714286, 'justin': 50.0, 'bento': 77.77777777777779, 'hisham': 72.22222222222221}
Bochinche: {'steak': 76.66666666666666, 'argentinian': 79.16666666666667, 'empanadas': 76.47058823529412, 'service staff': 42.85714285714286, 'steaks': 75.0}
Boon Tong Kee at River Valley: {'rice': 81.73076923076923, 'chicken rice': 85.65217391304347}
Borscht.sg: {'russian cuisine': 85.71428571428572, 'dumplings': 80.18018018018019, 'borscht': 71.21212121212122, 'dumpling': 70.45454545454545, 'chebureki': 73.68421052631578, 'combo platter': 81.42857142857143, 'medovik': 78.57142857142857, 'ru': 50.0}
Botanico at The Garage: {'service': 79.66101694915254, 'gardens': 82.14285714285714, 'botanic gardens': 83.33333333333333, 'emz': 100.0, 'ondeh': 68.75, 'amelina': 100.0}
Brasserie Gavroche: {'steak': 83.92857142857143, 'duck confit': 75.0, 'french brass

Coucou Hotpot I12 Katong: {'nice': 93.58974358974359, 'service': 96.59090909090908, 'mala soup': 100.0, 'bubble tea': 71.42857142857143, 'base': 97.05882352941177, 'soup base': 90.625, 'taiwanese': 83.33333333333333, 'steamboat': 100.0, 'lao': 100.0, 'lu': 90.0, 'rou fan': 91.66666666666667}
Crossings Cafe: {'enterprise': 75.0, 'social enterprise': 86.36363636363636, 'profits': 76.66666666666666, 'catholic': 62.5, 'disadvantaged': 75.0}
Crossroads Buffet: {'cafe': 76.66666666666666, 'service staff': 75.0, 'marriott cafe': 70.58823529411764, 'vic': 86.11111111111111, 'breakfast': 84.00000000000001, 'mogan': 100.0}
Crossroads: {'krishnan': 70.0, 'nathan': 75.0, 'venky': 78.57142857142857, 'mr bala': 83.33333333333333, 'jeff': 85.71428571428572}
Dabbawalla: {'naan': 96.875, 'service': 85.22727272727273, 'indian': 84.31372549019608, 'vindaloo': 90.90909090909092, 'quay': 56.81818181818181, 'robertson quay': 63.63636363636363, 'occ': 69.23076923076923, 'dabbawala': 62.5, 'curry culture': 70

fyr: {'tomahawk': 77.77777777777779, 'ribeye': 62.5, 'steak': 72.72727272727273, 'staff': 74.19354838709677, 'time': 67.85714285714286, 'pistachio': 73.07692307692308, 'iberico pork': 66.66666666666666, 'service staff': 100.0, 'entertainer': 50.0}
G7 Sin Ma Live Seafood Restaurant: {'frog porridge': 98.48484848484848, 'frog leg': 68.75, 'leg porridge': 85.0}
Gayatri Restaurant: {'indian': 81.57894736842105, 'mutton': 82.6086956521739, 'mysore': 100.0, 'rice': 57.407407407407405, 'briyani': 72.72727272727273, 'papadam': 31.25, 'naan': 95.0, 'chicken': 81.4814814814815, 'butter chicken': 86.36363636363636, 'excellent service': 100.0, 'dum': 85.71428571428572, 'fish head': 75.0}
Ginett Restaurant & Wine Bar: {'shamil': 88.88888888888889, 'zee': 93.5483870967742, 'fhenezia': 100.0, 'afiq': 100.0}
Ginger: {'buffet': 77.14285714285714, 'service': 87.3015873015873, 'friendly': 91.66666666666667, 'andri': 87.5}
Golden Peony at Conrad Centennial Singapore: {'vanessa': 84.21052631578947, 'excell

Komala's Restaurant: {'indian': 78.87323943661973, 'south indian': 89.1891891891892, 'little india': 35.0, 'place': 58.333333333333336, 'dosa': 61.904761904761905, 'dosas': 77.77777777777779, 'thali': 83.33333333333333, 'masala': 57.14285714285714, 'komalas': 83.33333333333333, 'self service': 81.81818181818181}
Koon Bak Kut Teh Cafe: {'soup': 84.48275862068965, 'dart': 80.0, 'nice place': 100.0, 'bee hoon': 100.0}
Kucina Italian Restaurant: {'really': 89.23076923076923, 'pizza': 79.8507462686567, 'pasta': 92.85714285714286, 'kinex': 90.90909090909092, 'gnocchi': 90.38461538461539, 'truffle': 79.16666666666667, 'la barca': 86.66666666666667, 'e tartufo': 72.72727272727273, 'tortellini': 70.0, 'antipasto': 94.44444444444444}
Kuriya Dining: {'sashimi': 93.93939393939394, 'fresh': 96.42857142857143, 'service': 90.67796610169492, 'sushi': 90.90909090909092, 'japanese restaurants': 85.71428571428572}
L'Angelus: {'service': 91.12903225806453, 'dinner': 70.51282051282051, 'escargots': 90.9090

Merci Marcel Tiong Bahru: {'service': 96.22641509433963, 'ravioli': 100.0, 'love': 97.36842105263158, 'come back': 83.33333333333333, 'melody': 90.90909090909092, 'felicia': 89.47368421052632, 'gurprit': 70.0, 'shuang': 81.25, 'mugilan': 83.33333333333333, 'danni': 100.0}
mezza9: {'hyatt': 76.92307692307692, 'excellent service': 87.5, 'loges': 90.90909090909092, 'sam': 100.0}
Mikuni: {'elaine': 95.83333333333333, 'excellent service': 100.0, 'samantha': 100.0, 'abegail': 82.14285714285714, 'tempura': 89.28571428571428}
Min Jiang Goodwood Park Hotel: {'dishes': 63.1578947368421, 'duck': 70.2127659574468, 'service': 67.1875, 'peking duck': 74.13793103448276, 'chinese': 80.3030303030303, 'sum': 75.0, 'dim sum': 85.29411764705883}
Mitsuba Japanese Restaurant: {'service': 80.85106382978724, 'fresh': 85.71428571428572, 'premium buffet': 100.0, 'thick fresh': 100.0}
Mondo Mio: {'pizza': 75.53191489361701, 'italian': 85.71428571428572}
MONTI: {'view': 89.53488372093024, 'excellent service': 100

Pete's Place: {'pizza': 69.11764705882352, 'service': 85.71428571428572, 'tiramisu': 63.33333333333333, 'italian': 87.93103448275862, 'hyatt': 81.81818181818181, 'buffet': 65.21739130434783, 'salad bar': 69.23076923076923}
Pietrasanta: {'service': 84.00000000000001, 'authentic italian': 100.0, 'pizza': 67.74193548387098, 'pasta': 91.25, 'burrata': 82.85714285714285}
Pince and Pints (Duxton): {'roll': 72.34042553191489, 'lobster roll': 76.19047619047619, 'lobsters': 78.0}
PIPES by HATTENDO: {'latte': 95.23809523809523, 'matcha latte': 78.57142857142857, 'well': 91.37931034482759, 'really': 94.15584415584415, 'aburiya': 87.5, 'wagyu beef': 88.63636363636364, 'sando': 57.89473684210527, 'buns': 93.5483870967742, 'cream buns': 88.88888888888889, 'toast': 84.61538461538461, 'burger': 82.35294117647058, 'bun': 82.6923076923077}
Pistachio Middle Eastern & Mediterranean Grill: {'service': 85.57692307692308, 'hummus': 75.0, 'kebab': 50.0, 'pita bread': 96.66666666666667, 'khaled': 82.8125, 'che

Shabestan Finest Persian Cuisine: {'hummus': 90.0, 'middle eastern': 81.81818181818181, 'kebab': 79.41176470588236, 'service staff': 100.0, 'singapore river': 100.0, 'iranian': 62.5}
Shahi Maharani North Indian Restaurant: {'service': 86.44067796610169, 'buffet': 82.6086956521739, 'live music': 58.333333333333336}
Shake Farm: {'place': 94.33962264150944, 'protein': 95.45454545454545, 'healthy options': 75.0, 'shakes': 91.66666666666667, 'mia': 88.88888888888889}
Shangri-la's Rasa Sentosa Resort Restaurants: {'hotel': 84.67741935483872, 'staff': 68.6046511627907, 'breakfast': 68.6046511627907, 'room': 68.05555555555556, 'pool': 80.3030303030303, 'buffet breakfast': 64.28571428571428, 'beach': 58.69565217391304, 'shangri': 100.0, 'kids club': 83.33333333333333}
Shashlik: {'shaslik': 78.57142857142857, 'soup': 75.0, 'borsch soup': 58.333333333333336, 'russian': 50.0, 'oxtail stew': 50.0, 'alaska': 71.42857142857143, 'baked alaska': 66.66666666666666, 'service staff': 60.0}
Shinzo Japanese

The Boiler (Esplanade): {'free lobster': 75.0, 'crab': 70.0, 'birthday': 75.0, 'bag': 65.38461538461539, 'staff': 75.80645161290323, 'service': 77.08333333333333, 'seafood bag': 93.75}
The Boiler (Howard Road): {'service': 78.88888888888889, 'bag': 73.07692307692308, 'crab': 66.66666666666666, 'sauce': 72.22222222222221, 'bombdiggity': 50.0, 'free lobster': 80.0, 'birthday cake': 85.71428571428572, 'seafood bag': 28.57142857142857, 'garlic butter': 100.0}
The Carvery: {'beef': 78.57142857142857, 'spread': 82.14285714285714, 'chope': 70.58823529411764, 'service staff': 100.0, 'buffet spread': 83.33333333333333, 'roast beef': 77.77777777777779, 'roasts': 88.88888888888889}
The Chop House Vivo City: {'steak': 68.6046511627907, 'staff': 77.41935483870968, 'service': 68.0, 'burger': 60.0, 'steaks': 54.54545454545454}
The Clifford Pier: {'fullerton': 76.66666666666666, 'marina bay': 80.76923076923077, 'service': 80.0, 'staff': 55.319148936170215}
The Coconut Club: {'lemak': 75.23809523809524

TWG Tea at ION Orchard: {'teas': 84.78260869565217, 'scones': 70.0}
TWG Tea Garden at Marina Bay Sands: {'teas': 91.30434782608697, 'scones': 85.0}
Two Chefs Bar Mexican & Italian: {'excellent service': 100.0, 'ash': 85.71428571428572, 'friendly staff': 92.85714285714286, 'quayside': 75.0, 'fajitas': 87.5, 'lea': 93.75, 'pizza': 78.26086956521738, 'vicky': 92.85714285714286, 'cove': 78.57142857142857, 'sentosa cove': 75.0, 'margarita': 81.81818181818181}
Uncle Leong Seafood: {'service': 87.5, 'crab': 72.82608695652173, 'crabs': 78.94736842105263, 'chilli crab': 58.333333333333336, 'shimmering': 83.33333333333333, 'sand crab': 100.0}
Uya Japanese Unagi Restaurant: {'hitsumabushi': 72.72727272727273, 'eel': 72.58064516129032}
Vatan Se: {'paneer': 97.22222222222221, 'service': 98.33333333333334, 'tikka': 81.81818181818181}
Vatos Urban Tacos: {'galbi': 83.33333333333333, 'service': 63.46153846153846, 'mexican': 77.08333333333333, 'taco': 60.71428571428571}
Verre Modern Bistro & Wine Bar: {

In [14]:
import json

# Export found sentiment scores
with open("sentiments.json", "w") as outfile:
    json.dump(output_dictionary, outfile)

In [17]:
import pickle
# Export TFIDF Vectorizer

out = open('tfidf_vectorizer.pkl', 'wb')
pickle.dump(vectorizer, out)

In [19]:
new_vectorizer = pickle.load(open('tfidf_vectorizer.pkl', 'rb'))

In [20]:
print(vectorizer == new_vectorizer)

False


In [22]:
print(tfidf_matrix.shape)

(438, 36229)


In [23]:
out.close()