In [36]:
import numpy as np
import pandas as pd
from pprint import pprint

from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import opinion_lexicon
from nltk.sentiment import SentimentIntensityAnalyzer

from textblob import TextBlob
import spacy
from spacy import displacy
from spacy.matcher import PhraseMatcher
from difflib import SequenceMatcher
from difflib import get_close_matches

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import json
import pickle

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
with open('data/reviews_clean.json', 'rb') as f:
    data_full = json.load(f)

In [4]:
data = data_full[1115:1121]

In [9]:
data

[{'phone': '+16464814044',
  'reviews': ["Wow! Had never heard of this place before but happened to walk by and the pretty decor and petit fours lured me in. I purchased one each of the three petit four flavors they had available. Got home and was absolutely blown away at how moist and flavorful (yet light!) they each were. Loved all three flavors (vanilla, rose, lavender). \xa0In terms of consistency and taste they reminded me of some of the better cakes I had while in Sweden (sigh, if anyone knows where I can get a good princesstorte in nyc, lmk!). Really tempted to go back and pick up more... \xa0Beautiful packaging too, would make great gifts. They are a bit pricey for the size but they're really delicious and stand out amongst other sweets options in the city. Great customer service, too. Highly recommend!",
   "Great service. Good coffee. Amazing pastries. You can't go wrong.\n\nI ordered a cappuccino. My sister is gawking over the pastries and awaiting her latte and croissant.\n

In [14]:
review = data[1]['reviews']
menu = data[1]['menu_data'][0]['menu']

In [28]:
review_sents = [sent_tokenize(r) for r in review]
pprint(review_sents)

[['I am discerning when it comes to five star reviews.',
  "Oh, 4-star reviews, I'll hand those out with reckless abandon.",
  'Ha!',
  'However, a trip to Kirsh left me feeling reeeeally really good.',
  'The Beau and I went for brunch on a Saturday afternoon around 1 p.m.; props '
  'to the new Yelp feature I was unaware of, in which the host put our name '
  'into their system and I got a text link telling me our approximate wait '
  'time AND the number of parties in front of us!',
  'After a scant 10 minute wait, we were seating in their bright - though a '
  'tad tight - dining area.',
  'Thrillist recently toted Kirsh as having the best French Toast on the Upper '
  'West Side, and that is a very fair assessment!',
  'There are a variety of options - both sweet and savory - and he went for '
  'the sweet Cinnamon Pear version, complete with mascarpone cheese on the '
  'side.',
  'This truly epitomized excellent French toast: the bread was thick, but '
  'airy.',
  'The flavor w

In [33]:
review_words = [word_tokenize(r) for r in review]
pprint(review_words)

[['I',
  'am',
  'discerning',
  'when',
  'it',
  'comes',
  'to',
  'five',
  'star',
  'reviews',
  '.',
  'Oh',
  ',',
  '4-star',
  'reviews',
  ',',
  'I',
  "'ll",
  'hand',
  'those',
  'out',
  'with',
  'reckless',
  'abandon',
  '.',
  'Ha',
  '!',
  'However',
  ',',
  'a',
  'trip',
  'to',
  'Kirsh',
  'left',
  'me',
  'feeling',
  'reeeeally',
  'really',
  'good',
  '.',
  'The',
  'Beau',
  'and',
  'I',
  'went',
  'for',
  'brunch',
  'on',
  'a',
  'Saturday',
  'afternoon',
  'around',
  '1',
  'p.m.',
  ';',
  'props',
  'to',
  'the',
  'new',
  'Yelp',
  'feature',
  'I',
  'was',
  'unaware',
  'of',
  ',',
  'in',
  'which',
  'the',
  'host',
  'put',
  'our',
  'name',
  'into',
  'their',
  'system',
  'and',
  'I',
  'got',
  'a',
  'text',
  'link',
  'telling',
  'me',
  'our',
  'approximate',
  'wait',
  'time',
  'AND',
  'the',
  'number',
  'of',
  'parties',
  'in',
  'front',
  'of',
  'us',
  '!',
  'After',
  'a',
  'scant',
  '10',
  'minute',

  'is',
  'a',
  'to-go',
  'bakery',
  'up',
  'at',
  'the',
  'front',
  ',',
  'but',
  'the',
  'dining',
  'area',
  'is',
  'to',
  'your',
  'right',
  'when',
  'you',
  'enter',
  '.',
  'A',
  'great',
  'spot',
  'for',
  'breakfast',
  ',',
  'I',
  "'d",
  'love',
  'to',
  'come',
  'back',
  'and',
  'try',
  'out',
  'their',
  'French',
  'Toasts',
  'and',
  'the',
  'quiche',
  '.',
  '+housemade',
  'gravy',
  '!',
  '!',
  '!',
  '+buttered',
  'toast',
  '+great',
  ',',
  'simple',
  'breakfast',
  'plates',
  '+avoid',
  'the',
  'home',
  'fries',
  '!',
  '+to-go',
  'bakery',
  'available',
  '+dining',
  'area',
  'to',
  'the',
  'right',
  'of',
  'the',
  'restaurant'],
 ['Came',
  'here',
  'for',
  'breakfast',
  'and',
  'had',
  'a',
  'nice',
  'meal',
  '!',
  'The',
  'place',
  'is',
  'very',
  'clean',
  'and',
  'tidy',
  '.',
  'We',
  'were',
  'seated',
  'quickly',
  'and',
  'the',
  'food',
  'was',
  'served',
  'in',
  'a',
  'good',
 

In [48]:
flat_sents = [r for review in review_sents for r in review]

# Sentiment Analysis

#### nltk.corpus.opinion_lexicon

In [79]:
negative_lex = opinion_lexicon.negative()

In [82]:
positive_lex = opinion_lexicon.positive()

#### nltk.sentiment.SentimentIntensityAnalyzer

In [134]:
sid = SentimentIntensityAnalyzer()

In [136]:
scores = [intensity_analyzer.polarity_scores(s) for s in flat_sents]

In [137]:
scores

[{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0},
 {'neg': 0.354, 'neu': 0.443, 'pos': 0.203, 'compound': -0.34},
 {'neg': 0.0, 'neu': 0.0, 'pos': 1.0, 'compound': 0.4003},
 {'neg': 0.0, 'neu': 0.63, 'pos': 0.37, 'compound': 0.5709},
 {'neg': 0.035, 'neu': 0.883, 'pos': 0.082, 'compound': 0.3595},
 {'neg': 0.0, 'neu': 0.838, 'pos': 0.162, 'compound': 0.4404},
 {'neg': 0.0, 'neu': 0.728, 'pos': 0.272, 'compound': 0.7955},
 {'neg': 0.0, 'neu': 0.793, 'pos': 0.207, 'compound': 0.7184},
 {'neg': 0.0, 'neu': 0.699, 'pos': 0.301, 'compound': 0.5106},
 {'neg': 0.193, 'neu': 0.7, 'pos': 0.107, 'compound': -0.2755},
 {'neg': 0.0, 'neu': 0.859, 'pos': 0.141, 'compound': 0.4215},
 {'neg': 0.14, 'neu': 0.696, 'pos': 0.164, 'compound': 0.2467},
 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0},
 {'neg': 0.146, 'neu': 0.787, 'pos': 0.067, 'compound': -0.4019},
 {'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0},
 {'neg': 0.0, 'neu': 0.829, 'pos': 0.171, 'compound': 0.5719},
 {'neg': 0.

In [140]:
compound = np.array([s['compound'] for s in scores])

In [141]:
ranked = list(np.argsort(compound)[::-1])

In [145]:
for r in ranked:
    print(compound[r], flat_sents[r])

0.9761 The hubs had the Power Breakfast (which comes with 3 eggs, any style, potatoes, salad, bacon or sausage as your meat choice, and toast, in addition to a choice of juice with coffee or tea; I wasn't a fan of the home fries, as they had an off-taste to them, but everything else was flavored well and tasty; the hubs really enjoyed the sausage as his meat choice, as it was a thicker patty; juice was fresh, and loved the generous pour of the coffee); and, I had the Steak & Eggs (which came with gravy, garlic, and a side of roasted potatoes; the potatoes were the same as the home fries, which I wasn't a fan of; the steak was seasoned well; the highlight of the dish, though, was the housemade gravy!
0.9081 Clean, beautiful restaurant with good food and good iced coffee.
0.8834 Hence, which is also why I love:

- Westside's veggie market platter 
- Bento boxes of any kind 

The Healthy Breakfast comes with two slices of absolute GODLY toast, scrambled egg whites, half of an avocado, a l

#### TextBlob

In [154]:
polarities = []
for s in flat_sents:
    test = TextBlob(s)
    polarities.append(test.sentiment.polarity)

In [155]:
pol_ranked = list(np.argsort(polarities)[::-1])

In [159]:
for p in pol_ranked:
    print(polarities[p], flat_sents[p])

1.0 Service is definitely top-notch at Kirsh.
1.0 I ordered a strawberry shortcake for my mom's birthday and it was delicious!
1.0 DELICIOUS.
1.0 Kirsh is one of the best restaurants in Manhattan!
1.0 Perfect place for breakfast and diner.
1.0 Best chocolate brioche I've ever had.
0.85 This is a beautiful space for coffee and pastries.
0.8 They happily changed the prices for us, though.
0.7500000000000001 What an amazing find!
0.7500000000000001 The service is amazing!
0.75 Came here for breakfast and had a nice meal!
0.74 Also, the lights in the restaurant was very nice, good for taking pictures.
0.7 Service was good too.
0.7 I also got the ricotta pistachio cake which I loved too.
0.7 The pastries were soo good.
0.6541666666666667 Clean, beautiful restaurant with good food and good iced coffee.
0.6499999999999999 Which is what I should have done but did not because the bread was too damn good to eat on its own.
0.6000000000000001 It was a amazing.
0.6 Have to mentioned the service th

In [176]:
review

["I am discerning when it comes to five star reviews. Oh, 4-star reviews, I'll hand those out with reckless abandon. Ha! However, a trip to Kirsh left me feeling reeeeally really good.\n\nThe Beau and I went for brunch on a Saturday afternoon around 1 p.m.; props to the new Yelp feature I was unaware of, in which the host put our name into their system and I got a text link telling me our approximate wait time AND the number of parties in front of us! After a scant 10 minute wait, we were seating in their bright - though a tad tight - dining area.\n\nThrillist recently toted Kirsh as having the best French Toast on the Upper West Side, and that is a very fair assessment! There are a variety of options - both sweet and savory - and he went for the sweet Cinnamon Pear version, complete with mascarpone cheese on the side. This truly epitomized excellent French toast: the bread was thick, but airy. The flavor was buttery, fruity, and spicy without being overwhelmingly sweet. The mascarpone

In [186]:
test = TextBlob(review[2])

In [187]:
for sentence in test.sentences:
    print(sentence, sentence.sentiment.polarity)

I had Kirsch down as one of my top brunch places to try, in part because while I love Jacob's Pickles in theory, a part of me would prefer not to need a long afternoon nap to shake off the food coma. 0.1388888888888889
The brunch menu at Kirsch covers the full spectrum but to be honest, I was here for one thing: the Healthy Breakfast. 0.48333333333333334
I am a sucker for dishes that have a huge variety of different things on one plate and yes, I am that person that eats each item separately, saving what I think is the best for last. 0.22000000000000003
Hence, which is also why I love:

- Westside's veggie market platter 
- Bento boxes of any kind 

The Healthy Breakfast comes with two slices of absolute GODLY toast, scrambled egg whites, half of an avocado, a little cup of yogurt, and a heap of green salad. 0.17797619047619048
Here is how you luxuriate in this plate of food: douse the yogurt in honey. 0.0
Or you could also try dipping home fries in the yogurt. 0.0
Then slather the avo

# Named Entity Recognition

In [199]:
nlp = spacy.load('en')

In [196]:
review[0]

"I am discerning when it comes to five star reviews. Oh, 4-star reviews, I'll hand those out with reckless abandon. Ha! However, a trip to Kirsh left me feeling reeeeally really good.\n\nThe Beau and I went for brunch on a Saturday afternoon around 1 p.m.; props to the new Yelp feature I was unaware of, in which the host put our name into their system and I got a text link telling me our approximate wait time AND the number of parties in front of us! After a scant 10 minute wait, we were seating in their bright - though a tad tight - dining area.\n\nThrillist recently toted Kirsh as having the best French Toast on the Upper West Side, and that is a very fair assessment! There are a variety of options - both sweet and savory - and he went for the sweet Cinnamon Pear version, complete with mascarpone cheese on the side. This truly epitomized excellent French toast: the bread was thick, but airy. The flavor was buttery, fruity, and spicy without being overwhelmingly sweet. The mascarpone 

In [197]:
doc = nlp(review[0])

In [198]:
for ent in doc.ents:
    print(ent.text, ent.label_)

five CARDINAL
4-star CARDINAL
Kirsh PERSON
Beau PERSON
Saturday DATE
afternoon TIME
around 1 p.m. TIME
Yelp PRODUCT
10 minute TIME
Thrillist PERSON
Kirsh PERSON
French NORP
the Upper West Side LOC
Cinnamon Pear PRODUCT
French NORP
TOO ORG
BLT ORG
BLT ORG
Haaa PERSON
five CARDINAL
one CARDINAL
Kirsh PERSON
the day DATE
Kirsh PERSON


In [200]:
doc = nlp(review[0])
displacy.render(doc, style='ent', jupyter=True)

In [201]:
for chunk in doc.noun_chunks:
    print(chunk.text, chunk.label_, chunk.root.text)

I NP I
it NP it
five star reviews NP reviews
I NP I
reckless abandon NP abandon
a trip NP trip
Kirsh NP Kirsh
me NP me
The Beau NP Beau
I NP I
brunch NP brunch
a Saturday afternoon NP afternoon
1 p.m. NP p.m.
the new Yelp feature NP feature
I NP I
the host NP host
our name NP name
their system NP system
I NP I
a text link NP link
me NP me
our approximate wait time NP time
the number NP number
parties NP parties
front NP front
us NP us
a scant 10 minute wait NP wait
we NP we
a tad NP tad
Thrillist NP Thrillist
Kirsh NP Kirsh
the best French Toast NP Toast
the Upper West Side NP Side
a very fair assessment NP assessment
a variety NP variety
options NP options
he NP he
the sweet Cinnamon Pear version NP version
mascarpone cheese NP cheese
the side NP side
excellent French toast NP toast
the bread NP bread
airy NP airy
The flavor NP flavor
The mascarpone NP mascarpone
a nice alternative NP alternative
butter NP butter
the flavor NP flavor
the dish NP dish
I NP I
I NP I
myself NP myself
I N

In [202]:
for token in doc:
    print("{0}/{1} <--{2}-- {3}/{4}".format(
        token.text, token.tag_, token.dep_, token.head.text, token.head.tag_))

I/PRP <--nsubj-- discerning/VBG
am/VBP <--aux-- discerning/VBG
discerning/VBG <--ROOT-- discerning/VBG
when/WRB <--advmod-- comes/VBZ
it/PRP <--nsubj-- comes/VBZ
comes/VBZ <--advcl-- discerning/VBG
to/IN <--prep-- comes/VBZ
five/CD <--nummod-- reviews/NNS
star/NN <--compound-- reviews/NNS
reviews/NNS <--pobj-- to/IN
./. <--punct-- discerning/VBG
Oh/UH <--intj-- reviews/NNS
,/, <--punct-- reviews/NNS
4-star/NN <--nummod-- reviews/NNS
reviews/NNS <--npadvmod-- hand/VB
,/, <--punct-- hand/VB
I/PRP <--nsubj-- hand/VB
'll/MD <--aux-- hand/VB
hand/VB <--ROOT-- hand/VB
those/DT <--dobj-- hand/VB
out/RP <--prt-- hand/VB
with/IN <--prep-- hand/VB
reckless/JJ <--amod-- abandon/NN
abandon/NN <--pobj-- with/IN
./. <--punct-- hand/VB
Ha/UH <--ROOT-- Ha/UH
!/. <--punct-- Ha/UH
However/RB <--advmod-- left/VBD
,/, <--punct-- left/VBD
a/DT <--det-- trip/NN
trip/NN <--nsubj-- left/VBD
to/IN <--prep-- trip/NN
Kirsh/NNP <--pobj-- to/IN
left/VBD <--ROOT-- left/VBD
me/PRP <--dobj-- left/VBD
feeling/NN <--ad

In [203]:
displacy.render(doc, style='dep', jupyter=True, options={'distance': 90})

# Computing similarities

#### difflib.SequenceMatcher

In [247]:
match_sentence = review_sents[7][0].lower()
match_sentence

"i ordered a strawberry shortcake for my mom's birthday and it was delicious!"

In [260]:
ratios = []
for item in items:
    m = SequenceMatcher(None, item, match_sentence)
    ratios.append( m.ratio() )

In [266]:
ratio_rankings = np.argsort(np.array(ratios))[::-1]

In [268]:
for r in ratio_rankings:
    print(items[r], ratios[r])

Strawberry Shortcake Layered Cake 0.45871559633027525
Grilled Chicken Breast Sandwich 0.3364485981308411
Goat Cheese and Tomato Sandwich 0.3177570093457944
Spiced Jasmine Rice and Lentils 0.3177570093457944
Grilled Eggplant and Garlic Aioli 0.3119266055045872
Sauteed Wild Shrimp and Artichokes 0.2909090909090909
Sweet Ricota Cherry Danish 0.27450980392156865
Lox, Creme Fraiche and Spring Onion French Toast 0.27419354838709675
Mascarpone Cream and Mixed Berry Jam French Toast 0.272
Bread and Pastry Basket 0.26262626262626265
Black Forest Layer Cake 0.26262626262626265
Pear & Almond Tarte Tatin 0.25742574257425743
Shepherd Salad  and Chevre 0.2549019607843137
Mushroom and Zucchini Risotto 0.24761904761904763
Pesto and Chicken Pasta 0.24242424242424243
Bacon, Swiss and Egg French Toast 0.23853211009174313
Polenta and Farro 0.23655913978494625
Fresh Fruit Salad 0.23655913978494625
Raw Root Vegetables and Gorgonzola 0.23636363636363636
Large Creme Schnitt 0.23157894736842105
Potato Rosemary

In [274]:
review_sents[7][1]

'I have eaten at their cafe and love their egg dishes with bacon and toast.'

In [275]:
match_sentence_2 = review_sents[7][1]

In [276]:
ratios2 = []
for item in items:
    m = SequenceMatcher(None, item, match_sentence_2)
    ratios2.append( m.ratio() )

In [277]:
ratio2_rankings = np.argsort(np.array(ratios2))[::-1]

In [278]:
for r in ratio2_rankings:
    print(items[r], ratios2[r])

Lox, Creme Fraiche and Spring Onion French Toast 0.39344262295081966
Goat Cheese and Tomato Sandwich 0.34285714285714286
Raw Root Vegetables and Gorgonzola 0.3333333333333333
Quinoa and Tumeric Vegetable Stew 0.3177570093457944
Bacon, Swiss and Egg French Toast 0.3177570093457944
Pesto and Chicken Pasta 0.30927835051546393
Spiced Jasmine Rice and Lentils 0.3047619047619048
Mushroom and Zucchini Risotto 0.2912621359223301
Chocolate Orange Crinkle Cookie 0.2857142857142857
Sweet Ricota Cherry Danish 0.28
Sauteed Wild Shrimp and Artichokes 0.2777777777777778
Chocolate Peanut Butter Pie 0.27722772277227725
Mascarpone Cream and Mixed Berry Jam French Toast 0.2764227642276423
Cinnamon and Pear French Toast 0.2692307692307692
Wheat Germ Bread 0.26666666666666666
Chocolate Hazelnut Danish 0.26262626262626265
Steak and Eggs 0.25
Chocolate Brioche Bread 0.24742268041237114
Endive, Radicchio, Seasonal Fruit 0.24299065420560748
Pear & Almond Tarte Tatin 0.24242424242424243
Healthy Breakfast 0.2417

In [286]:
test = 'egg dishes with bacon and toast'

In [287]:
ratios3 = []
for item in items:
    m = SequenceMatcher(None, item, test)
    ratios3.append( m.ratio() )

In [288]:
ratio3_rankings = np.argsort(np.array(ratios3))[::-1]

for r in ratio3_rankings:
    print(items[r], ratios3[r])

Pesto and Chicken Pasta 0.4444444444444444
Cinnamon and Pear French Toast 0.4262295081967213
Bacon, Swiss and Egg French Toast 0.40625
Spicy Avocado Toast 0.4
Goat Cheese and Tomato Sandwich 0.3870967741935484
Bread and Pastry Basket 0.37037037037037035
Pistachio Biscotti 0.3673469387755102
Steak and Eggs 0.35555555555555557
Shepherd Salad  and Chevre 0.3508771929824561
Beef Stroganoff 0.34782608695652173
Pistachio Coffee Cake 0.34615384615384615
Grilled Eggplant and Garlic Aioli 0.34375
Full English Breakfast 0.33962264150943394
Raw Root Vegetables and Gorgonzola 0.3384615384615385
Mushroom and Zucchini Risotto 0.3333333333333333
Healthy Breakfast 0.3333333333333333
Polenta and Farro 0.3333333333333333
Lox, Creme Fraiche and Spring Onion French Toast 0.3291139240506329
Bread Basket 0.32558139534883723
Mascarpone Cream and Mixed Berry Jam French Toast 0.325
Spiced Jasmine Rice and Lentils 0.3225806451612903
Quinoa and Tumeric Vegetable Stew 0.3125
Mac and Gruyere 0.30434782608695654
Am

#### Cosine similarities tfidf vectorizer

In [312]:
vect = TfidfVectorizer(stop_words='english')
menu_counts = vect.fit_transform(items)

In [313]:
test

'egg dishes with bacon and toast'

In [314]:
test_mat = vect.transform([test])

In [315]:
print(vect.vocabulary_)

{'coffee': 36, 'cappuccino': 24, 'latte': 83, 'americano': 2, 'mocha': 96, 'herbal': 72, 'tea': 162, 'hot': 74, 'chocolate': 34, 'fruit': 61, 'smoothie': 147, 'frozen': 60, 'mint': 94, 'lemonade': 87, 'orange': 102, 'juice': 78, 'grapefruit': 66, 'bread': 16, 'pastry': 106, 'basket': 7, 'oatmeal': 99, 'eggs': 48, 'style': 157, 'healthy': 71, 'breakfast': 17, 'omelette': 100, 'english': 50, 'steak': 153, 'daily': 44, 'quiche': 119, 'lox': 89, 'creme': 42, 'fraiche': 55, 'spring': 152, 'onion': 101, 'french': 56, 'toast': 163, 'bacon': 5, 'swiss': 159, 'egg': 46, 'mascarpone': 91, 'cream': 41, 'mixed': 95, 'berry': 10, 'jam': 76, 'cinnamon': 35, 'pear': 108, 'banana': 6, 'caramel': 25, 'walnut': 169, 'home': 73, 'fries': 58, 'ham': 69, 'house': 75, 'sausage': 133, 'fresh': 57, 'salad': 130, 'organic': 103, 'yogurt': 174, 'wheat': 171, 'germ': 63, 'seeded': 140, 'sourdough': 149, 'potato': 114, 'rosemary': 129, 'brioche': 19, 'rolls': 127, 'wheatgerm': 172, 'flutes': 53, 'cheesesticks': 3

In [330]:
similarities = cosine_similarity(menu_counts, test_mat)

In [332]:
flat = [s for sim in similarities for s in sim]

In [336]:
ranked_cos = np.argsort(flat)[::-1]

In [337]:
for r in ranked_cos:
    print(flat[r], items[r])

0.7948639538998923 Bacon, Swiss and Egg French Toast
0.5926234123863302 Bacon
0.22426810194212127 Spicy Avocado Toast
0.21448419342354885 Cinnamon and Pear French Toast
0.17682463412427302 Banana Caramel Walnut French Toast
0.15028962845362379 Lox, Creme Fraiche and Spring Onion French Toast
0.14502466492418795 Mascarpone Cream and Mixed Berry Jam French Toast
0.0 Potato Rosemary Bread
0.0 House Sausage
0.0 House-Made Lox
0.0 Fresh Fruit Salad
0.0 Organic Yogurt
0.0 Wheat Germ Bread
0.0 Seeded Sourdough
0.0 Brioche Rolls
0.0 Chocolate Brioche Bread
0.0 Potato Rosemary Rolls
0.0 Wheatgerm Flutes
0.0 Swiss Cheesesticks
0.0 Plain Danish
0.0 Almond Danish
0.0 Chocolate Hazelnut Danish
0.0 Cinnamon-Frosted Danish
0.0 Sweet Ricota Cherry Danish
0.0 Savory Danish
0.0 Mini Kugelhopf Cake
0.0 Ham
0.0 Strawberry Shortcake Layered Cake
0.0 Home Fries
0.0 Black Forest Layer Cake
0.0 Cappuccino
0.0 Latte
0.0 Americano
0.0 Mocha
0.0 Herbal Tea
0.0 Hot Chocolate    
0.0 Fruit Smoothie
0.0 Frozen Mint

In [340]:
test2_mat = vect.transform([review_sents[7][1]])

similarities2 = cosine_similarity(menu_counts, test2_mat)

flat2 = [s for sim in similarities2 for s in sim]

ranked_cos2 = np.argsort(flat2)[::-1]

for r in ranked_cos2:
    print(flat2[r], items[r])

0.7948639538998923 Bacon, Swiss and Egg French Toast
0.5926234123863302 Bacon
0.22426810194212127 Spicy Avocado Toast
0.21448419342354885 Cinnamon and Pear French Toast
0.17682463412427302 Banana Caramel Walnut French Toast
0.15028962845362379 Lox, Creme Fraiche and Spring Onion French Toast
0.14502466492418795 Mascarpone Cream and Mixed Berry Jam French Toast
0.0 Potato Rosemary Bread
0.0 House Sausage
0.0 House-Made Lox
0.0 Fresh Fruit Salad
0.0 Organic Yogurt
0.0 Wheat Germ Bread
0.0 Seeded Sourdough
0.0 Brioche Rolls
0.0 Chocolate Brioche Bread
0.0 Potato Rosemary Rolls
0.0 Wheatgerm Flutes
0.0 Swiss Cheesesticks
0.0 Plain Danish
0.0 Almond Danish
0.0 Chocolate Hazelnut Danish
0.0 Cinnamon-Frosted Danish
0.0 Sweet Ricota Cherry Danish
0.0 Savory Danish
0.0 Mini Kugelhopf Cake
0.0 Ham
0.0 Strawberry Shortcake Layered Cake
0.0 Home Fries
0.0 Black Forest Layer Cake
0.0 Cappuccino
0.0 Latte
0.0 Americano
0.0 Mocha
0.0 Herbal Tea
0.0 Hot Chocolate    
0.0 Fruit Smoothie
0.0 Frozen Mint

In [342]:
test3_mat = vect.transform([review_sents[7][0]])

similarities3 = cosine_similarity(menu_counts, test3_mat)

flat3 = [s for sim in similarities3 for s in sim]

ranked_cos3 = np.argsort(flat3)[::-1]

for r in ranked_cos3:
    print(flat3[r], items[r])

0.745075896393277 Strawberry Shortcake Layered Cake
0.0 Banana Caramel Walnut French Toast
0.0 Bacon
0.0 Ham
0.0 House Sausage
0.0 House-Made Lox
0.0 Fresh Fruit Salad
0.0 Organic Yogurt
0.0 Wheat Germ Bread
0.0 Seeded Sourdough
0.0 Potato Rosemary Bread
0.0 Chocolate Brioche Bread
0.0 Brioche Rolls
0.0 Potato Rosemary Rolls
0.0 Wheatgerm Flutes
0.0 Swiss Cheesesticks
0.0 Plain Danish
0.0 Almond Danish
0.0 Chocolate Hazelnut Danish
0.0 Cinnamon-Frosted Danish
0.0 Sweet Ricota Cherry Danish
0.0 Savory Danish
0.0 Mini Kugelhopf Cake
0.0 Home Fries
0.0 Cinnamon and Pear French Toast
0.0 Black Forest Layer Cake
0.0 Mascarpone Cream and Mixed Berry Jam French Toast
0.0 Cappuccino
0.0 Latte
0.0 Americano
0.0 Mocha
0.0 Herbal Tea
0.0 Hot Chocolate    
0.0 Fruit Smoothie
0.0 Frozen Mint Lemonade 
0.0 Orange Juice
0.0 Lemonade
0.0 Grapefruit Juice
0.0 Bread and Pastry Basket
0.0 Oatmeal
0.0 2 Eggs Any Style
0.0 Healthy Breakfast
0.0 Omelette
0.0 Full English Breakfast
0.0 Steak and Eggs
0.0 Dai

#### Cosine Similarity on other reviews

In [361]:
review_sents = sent_tokenize(review[0])

In [363]:
review_mat = vect.transform(review_sents)
sim_mat = cosine_similarity(menu_counts, review_mat)

In [365]:
for i in range(sim_mat.shape[1]):
    print(review_sents[i])
    rankings = np.argsort(sim_mat[:,i])[::-1]
    for r in rankings[:3]:
        print(sim_mat[r,i], items[r])

I am discerning when it comes to five star reviews.
0.0 Strawberry Shortcake Layered Cake
0.0 Banana Caramel Walnut French Toast
0.0 Bacon
Oh, 4-star reviews, I'll hand those out with reckless abandon.
0.0 Strawberry Shortcake Layered Cake
0.0 Banana Caramel Walnut French Toast
0.0 Bacon
Ha!
0.0 Strawberry Shortcake Layered Cake
0.0 Banana Caramel Walnut French Toast
0.0 Bacon
However, a trip to Kirsh left me feeling reeeeally really good.
0.6455773222459829 Kirsh House Salad
0.0 Strawberry Shortcake Layered Cake
0.0 Brioche Rolls
The Beau and I went for brunch on a Saturday afternoon around 1 p.m.; props to the new Yelp feature I was unaware of, in which the host put our name into their system and I got a text link telling me our approximate wait time AND the number of parties in front of us!
0.0 Strawberry Shortcake Layered Cake
0.0 Banana Caramel Walnut French Toast
0.0 Bacon
After a scant 10 minute wait, we were seating in their bright - though a tad tight - dining area.
0.0 Strawb

In [367]:
review_sents = sent_tokenize(review[1])
review_mat = vect.transform(review_sents)
sim_mat = cosine_similarity(menu_counts, review_mat)
for i in range(sim_mat.shape[1]):
    print(review_sents[i])
    rankings = np.argsort(sim_mat[:,i])[::-1]
    for r in rankings[:3]:
        print(sim_mat[r,i], items[r])

Went here for lunch during a weekday so there was no wait at all.
0.0 Strawberry Shortcake Layered Cake
0.0 Banana Caramel Walnut French Toast
0.0 Bacon
My friend and I ordered the lox french toast and the quiche of the day to share.
0.4762544040922819 Lox, Creme Fraiche and Spring Onion French Toast
0.4115781688584351 Daily Quiche
0.3898422431275875 House-Made Lox
Food:
-Lox french toast: 4/5.
0.5856934159517467 Lox, Creme Fraiche and Spring Onion French Toast
0.47942451155883875 House-Made Lox
0.4753677460492079 Cinnamon and Pear French Toast
The french toast bread was definitely the star of the dish.
0.5146802908548123 Cinnamon and Pear French Toast
0.44166124631278236 Bacon, Swiss and Egg French Toast
0.42431170646528654 Banana Caramel Walnut French Toast
It was pretty light and refreshing with the creme fresh.
0.48973638237302075 Fresh Fruit Salad
0.34843882781580543 Large Creme Schnitt
0.34843882781580543 Small Creme Schnitt
Could do with more pieces of salmon, though.
0.73686488

In [371]:
test = TextBlob(review[0])

In [372]:
test.noun_phrases

WordList(['star reviews', 'oh', '4-star reviews', "'ll hand", 'reckless abandon', 'ha', 'kirsh', 'beau', 'yelp', 'text link', 'and', 'thrillist', 'kirsh', 'toast', 'upper', 'side', 'fair assessment', 'cinnamon pear', 'mascarpone cheese', 'nice alternative', "n't order", 'too', 'blte', 'blt', 'emmental cheese', 'blts', 'ever', 'life', 'blts', 'blt', 'sad sandwich', 'haaa', 'buttery roll', 'epic ranch dressing', 'damn', 'delicious', 'ha', 'accidents', 'kirsh', 'own lovely menu', 'kirsh', 'epic gem'])

In [375]:
test.tags

[('I', 'PRP'),
 ('am', 'VBP'),
 ('discerning', 'VBG'),
 ('when', 'WRB'),
 ('it', 'PRP'),
 ('comes', 'VBZ'),
 ('to', 'TO'),
 ('five', 'CD'),
 ('star', 'NN'),
 ('reviews', 'NNS'),
 ('Oh', 'UH'),
 ('4-star', 'JJ'),
 ('reviews', 'NNS'),
 ('I', 'PRP'),
 ("'ll", 'MD'),
 ('hand', 'NN'),
 ('those', 'DT'),
 ('out', 'IN'),
 ('with', 'IN'),
 ('reckless', 'JJ'),
 ('abandon', 'NN'),
 ('Ha', 'NN'),
 ('However', 'RB'),
 ('a', 'DT'),
 ('trip', 'NN'),
 ('to', 'TO'),
 ('Kirsh', 'NNP'),
 ('left', 'VBD'),
 ('me', 'PRP'),
 ('feeling', 'VBG'),
 ('reeeeally', 'RB'),
 ('really', 'RB'),
 ('good', 'JJ'),
 ('The', 'DT'),
 ('Beau', 'NNP'),
 ('and', 'CC'),
 ('I', 'PRP'),
 ('went', 'VBD'),
 ('for', 'IN'),
 ('brunch', 'NN'),
 ('on', 'IN'),
 ('a', 'DT'),
 ('Saturday', 'NNP'),
 ('afternoon', 'NN'),
 ('around', 'RB'),
 ('1', 'CD'),
 ('p.m.', 'NN'),
 ('props', 'NNS'),
 ('to', 'TO'),
 ('the', 'DT'),
 ('new', 'JJ'),
 ('Yelp', 'NNP'),
 ('feature', 'NN'),
 ('I', 'PRP'),
 ('was', 'VBD'),
 ('unaware', 'NN'),
 ('of', 'IN'),
 (

In [379]:
data[2]

{'phone': '+12128772298',
 'reviews': ['This is my second time I had lunch here and I have to say, the staff surprised me. She has very good memory. When I walked in, she already knew what I wanted. She said, "Do you want the same order as yesterday, the chicken broccoli with out broccoliI?" And what I said in response was "WOW, How do you know my order?" And she knows that the soda is not cold enough so she gave me a cup of ice immediately. In addition, the food is delicious too. Also I will try the tapioca tea next time.',
  'My friends order the delivery.Nothing was fine except for the delivery service.\n\nCan\'t stand this kind of "food" as a Chinese. \nThe only reason I give two stars instead of one is because I won\'t give one-star review unless the restaurant leads to a food poison',
  'I just walked in off the street because i was hungry. I ordered the cold sesame noodles. I sat down on a nearby bench to eat it. While exiting out the door to go outside one of the kitchen staff 

In [6]:
review = data[2]['reviews']
menu = data[2]['menu_data'][0]['menu']
review_sents = [sent_tokenize(r) for r in review]
flat_sents = [r for review in review_sents for r in review]
pprint(flat_sents)

['This is my second time I had lunch here and I have to say, the staff '
 'surprised me.',
 'She has very good memory.',
 'When I walked in, she already knew what I wanted.',
 'She said, "Do you want the same order as yesterday, the chicken broccoli '
 'with out broccoliI?"',
 'And what I said in response was "WOW, How do you know my order?"',
 'And she knows that the soda is not cold enough so she gave me a cup of ice '
 'immediately.',
 'In addition, the food is delicious too.',
 'Also I will try the tapioca tea next time.',
 'My friends order the delivery.Nothing was fine except for the delivery '
 'service.',
 'Can\'t stand this kind of "food" as a Chinese.',
 "The only reason I give two stars instead of one is because I won't give "
 'one-star review unless the restaurant leads to a food poison',
 'I just walked in off the street because i was hungry.',
 'I ordered the cold sesame noodles.',
 'I sat down on a nearby bench to eat it.',
 'While exiting out the door to go outside one

In [10]:
items = list(menu.keys())
vect = TfidfVectorizer(stop_words='english')
menu_mat = vect.fit_transform(items)

In [11]:
review_mat = vect.transform(flat_sents)
sim_mat = cosine_similarity(menu_mat, review_mat)
for i in range(sim_mat.shape[1]):
    print(flat_sents[i])
    rankings = np.argsort(sim_mat[:,i])[::-1]
    for r in rankings[:3]:
        print(sim_mat[r,i], items[r])

This is my second time I had lunch here and I have to say, the staff surprised me.
0.35113555865223306 2 Roll Combo Lunch Special
0.35113555865223306 3 Roll Combo Lunch Special
0.2815582807905024 811. 3 Delight Lunch Special
She has very good memory.
0.0 SS33. Crispy Frog with Cured Egg Yolk
0.0 414. Jumbo Shrimp with Asparagus Lunch Special
0.0 413. Jumbo Shrimp with Snow Peas and Mushrooms Lunch Special
When I walked in, she already knew what I wanted.
0.0 SS33. Crispy Frog with Cured Egg Yolk
0.0 414. Jumbo Shrimp with Asparagus Lunch Special
0.0 413. Jumbo Shrimp with Snow Peas and Mushrooms Lunch Special
She said, "Do you want the same order as yesterday, the chicken broccoli with out broccoliI?"
0.49823234684194856 410. Jumbo Shrimp with Broccoli Dinner
0.4893585311729002 809. Broccoli in Garlic Sauce Dinner
0.4843689802757186 410. Jumbo Shrimp with Broccoli Lunch Special
And what I said in response was "WOW, How do you know my order?"
0.0 SS33. Crispy Frog with Cured Egg Yolk
0.

In [29]:
nps = []

for i, sent in enumerate(flat_sents):
    tokenizer = TextBlob(sent)
    tags = tokenizer.noun_phrases    
    nps.append(''.join(t for t in list(tags)))
    
nps

['',
 'good memory',
 '',
 'chicken broccoli',
 'wow',
 '',
 '',
 'tapioca tea',
 'friends orderdelivery service',
 "can't stand",
 "wo n'tone-star reviewfood poison",
 '',
 'cold sesame noodles',
 'nearby bench',
 'kitchen staffblow snot rockets',
 '',
 '',
 '',
 '',
 '',
 'obvious spotsblue/ green',
 '',
 '',
 'sesame saucesoy saucesesame seeds thrown',
 'couldnt finish',
 '',
 'couple bites',
 'cold sesame noodles',
 'chinese place',
 'china',
 'sad excuse',
 'neverkitchen staffnose mucous',
 'local hood chinese spot',
 'shredded beefhot peppers',
 '',
 "n't order",
 '',
 'taro milk teai dont',
 'broadwayuws',
 'washigh end',
 'sorry',
 'eggplant dishegg roll',
 'egg roll',
 '',
 '',
 '',
 '',
 '',
 '$ 14+',
 'wo',
 '',
 '',
 'now.. wont',
 'zero stars',
 'horrible customer serviceollie',
 "'s treatmentfoe",
 'front countertypical behavior',
 'sowaitedwaited',
 '',
 'mouth agape',
 'best thingwalknever go back again',
 'rude',
 'crappy service',
 'take your business where you are ap

In [30]:
flat_sents

['This is my second time I had lunch here and I have to say, the staff surprised me.',
 'She has very good memory.',
 'When I walked in, she already knew what I wanted.',
 'She said, "Do you want the same order as yesterday, the chicken broccoli with out broccoliI?"',
 'And what I said in response was "WOW, How do you know my order?"',
 'And she knows that the soda is not cold enough so she gave me a cup of ice immediately.',
 'In addition, the food is delicious too.',
 'Also I will try the tapioca tea next time.',
 'My friends order the delivery.Nothing was fine except for the delivery service.',
 'Can\'t stand this kind of "food" as a Chinese.',
 "The only reason I give two stars instead of one is because I won't give one-star review unless the restaurant leads to a food poison",
 'I just walked in off the street because i was hungry.',
 'I ordered the cold sesame noodles.',
 'I sat down on a nearby bench to eat it.',
 'While exiting out the door to go outside one of the kitchen staf

In [31]:
review_mat = vect.transform(nps)
sim_mat = cosine_similarity(menu_mat, review_mat)
for i in range(sim_mat.shape[1]):
    print(flat_sents[i])
    rankings = np.argsort(sim_mat[:,i])[::-1]
    for r in rankings[:3]:
        print(sim_mat[r,i], items[r])

This is my second time I had lunch here and I have to say, the staff surprised me.
0.0 SS33. Crispy Frog with Cured Egg Yolk
0.0 414. Jumbo Shrimp with Asparagus Lunch Special
0.0 413. Jumbo Shrimp with Snow Peas and Mushrooms Lunch Special
She has very good memory.
0.0 SS33. Crispy Frog with Cured Egg Yolk
0.0 414. Jumbo Shrimp with Asparagus Lunch Special
0.0 413. Jumbo Shrimp with Snow Peas and Mushrooms Lunch Special
When I walked in, she already knew what I wanted.
0.0 SS33. Crispy Frog with Cured Egg Yolk
0.0 414. Jumbo Shrimp with Asparagus Lunch Special
0.0 413. Jumbo Shrimp with Snow Peas and Mushrooms Lunch Special
She said, "Do you want the same order as yesterday, the chicken broccoli with out broccoliI?"
0.49823234684194856 410. Jumbo Shrimp with Broccoli Dinner
0.4893585311729002 809. Broccoli in Garlic Sauce Dinner
0.4843689802757186 410. Jumbo Shrimp with Broccoli Lunch Special
And what I said in response was "WOW, How do you know my order?"
0.0 SS33. Crispy Frog with C

In [37]:
nps

['',
 'good memory',
 '',
 'chicken broccoli',
 'wow',
 '',
 '',
 'tapioca tea',
 'friends orderdelivery service',
 "can't stand",
 "wo n'tone-star reviewfood poison",
 '',
 'cold sesame noodles',
 'nearby bench',
 'kitchen staffblow snot rockets',
 '',
 '',
 '',
 '',
 '',
 'obvious spotsblue/ green',
 '',
 '',
 'sesame saucesoy saucesesame seeds thrown',
 'couldnt finish',
 '',
 'couple bites',
 'cold sesame noodles',
 'chinese place',
 'china',
 'sad excuse',
 'neverkitchen staffnose mucous',
 'local hood chinese spot',
 'shredded beefhot peppers',
 '',
 "n't order",
 '',
 'taro milk teai dont',
 'broadwayuws',
 'washigh end',
 'sorry',
 'eggplant dishegg roll',
 'egg roll',
 '',
 '',
 '',
 '',
 '',
 '$ 14+',
 'wo',
 '',
 '',
 'now.. wont',
 'zero stars',
 'horrible customer serviceollie',
 "'s treatmentfoe",
 'front countertypical behavior',
 'sowaitedwaited',
 '',
 'mouth agape',
 'best thingwalknever go back again',
 'rude',
 'crappy service',
 'take your business where you are ap

In [49]:
for i, n in enumerate(nps):
    print(flat_sents[i])
    print(get_close_matches(n, items, cutoff=0.5))

This is my second time I had lunch here and I have to say, the staff surprised me.
[]
She has very good memory.
[]
When I walked in, she already knew what I wanted.
[]
She said, "Do you want the same order as yesterday, the chicken broccoli with out broccoliI?"
[]
And what I said in response was "WOW, How do you know my order?"
[]
And she knows that the soda is not cold enough so she gave me a cup of ice immediately.
[]
In addition, the food is delicious too.
[]
Also I will try the tapioca tea next time.
[]
My friends order the delivery.Nothing was fine except for the delivery service.
[]
Can't stand this kind of "food" as a Chinese.
[]
The only reason I give two stars instead of one is because I won't give one-star review unless the restaurant leads to a food poison
[]
I just walked in off the street because i was hungry.
[]
I ordered the cold sesame noodles.
['A7. Sesame Noodles']
I sat down on a nearby bench to eat it.
[]
While exiting out the door to go outside one of the kitchen s

In [50]:
for s in flat_sents:
    print(s)
    print(get_close_matches(s, items, cutoff=0.5))

This is my second time I had lunch here and I have to say, the staff surprised me.
[]
She has very good memory.
[]
When I walked in, she already knew what I wanted.
[]
She said, "Do you want the same order as yesterday, the chicken broccoli with out broccoliI?"
[]
And what I said in response was "WOW, How do you know my order?"
[]
And she knows that the soda is not cold enough so she gave me a cup of ice immediately.
[]
In addition, the food is delicious too.
[]
Also I will try the tapioca tea next time.
[]
My friends order the delivery.Nothing was fine except for the delivery service.
[]
Can't stand this kind of "food" as a Chinese.
[]
The only reason I give two stars instead of one is because I won't give one-star review unless the restaurant leads to a food poison
[]
I just walked in off the street because i was hungry.
[]
I ordered the cold sesame noodles.
['A7. Sesame Noodles']
I sat down on a nearby bench to eat it.
[]
While exiting out the door to go outside one of the kitchen s

[]
I will never order from them again this time was the last straw.
[]
The other 2 times they left something out and was rude when I asked for the lost Item.
[]
Last time and the only time I called they forgot the order entirely.
[]
Food is good but i rather save my sanity than to deal with them.
[]
I can find it somewhere else.
[]
We ordered jumbo shrimp in curry sauce...meh...kinda flavorless.
[]
Shrimp shumai...they were ok. Then fried chicken dumplings...again meh!
[]
Worst part was it took an hour and 15 minutes and I live not even 10 minutes away!
[]
Called 3 times and they continually said he left 10 minutes ago.
[]
Unfortunately food was cold by the time we got it!
[]
I would recommend ordering from someone else!
[]
So many great restaurants in Nyc, why order food that's MEH!
[]
The service here is so horrendous, it's comical.
[]
The quality of food is the only redeeming quality (and I'm Chinese, so that is kinda saying something I guess) - the ONLY reason why this review is 2 

In [55]:
review = data[3]['reviews']
menu = data[3]['menu_data'][0]['menu']

review_sents = [sent_tokenize(r) for r in review]
flat_sents = [r for review in review_sents for r in review]

items = list(menu.keys())

pprint(flat_sents)

['Evening on the town after leaving treat house we decided to have some '
 'Italian food only because I was graving meatballs, this is definitely a cool '
 'little spot for brunch with friends.',
 'The service was nice and quick waitress super friendly and attentive.',
 'I ordered \xa0meatball sliders and truffle fries that was to die for.',
 'The meatball slides were perfect!!!!',
 ", they were only appetizers (I believe )but very filling omg I'll go back to "
 'order the exact same thing \n'
 '\n'
 'My husband ordered chicken pam but it doesnt come with pasta which he found '
 'disappointing.',
 'Overall I would return, nice atmosphere and love the neighborhood',
 'For a place by someone named "Nicky Meatballs" & voted "Best Meatballs in '
 'the Five Boroughs" you\'d think they\'d take some pride in their meatballs '
 'or their menu.',
 'Their menu boasts some interesting non-meatball sliders which are advertised '
 'in the menu to come "...on brioche rolls, with mixed greens".',
 'W

In [56]:
for s in flat_sents:
    print(s)
    print(get_close_matches(s, items, cutoff=0.5))

Evening on the town after leaving treat house we decided to have some Italian food only because I was graving meatballs, this is definitely a cool little spot for brunch with friends.
[]
The service was nice and quick waitress super friendly and attentive.
[]
I ordered  meatball sliders and truffle fries that was to die for.
[]
The meatball slides were perfect!!!!
[]
, they were only appetizers (I believe )but very filling omg I'll go back to order the exact same thing 

My husband ordered chicken pam but it doesnt come with pasta which he found disappointing.
[]
Overall I would return, nice atmosphere and love the neighborhood
[]
For a place by someone named "Nicky Meatballs" & voted "Best Meatballs in the Five Boroughs" you'd think they'd take some pride in their meatballs or their menu.
[]
Their menu boasts some interesting non-meatball sliders which are advertised in the menu to come "...on brioche rolls, with mixed greens".
[]
When I inquired about these my server informed me that

[]
Would come again.
[]


In [57]:
items

['Truffle Parmesan French Fries',
 'Mozzarella Fritti',
 'Zucchini Fritti',
 'Caprese',
 'Cozze a la Marinara',
 'Calamari Fritti',
 'Eggplant Rollatine',
 'Gamberi e Fungo',
 'Sicilian Platter',
 'Pasta Fagioli',
 'Straccietella',
 'Insalata Mista',
 'Insalata di Caesar',
 'Insalata Tre Colori',
 'Insalata Arugula',
 'Portobello Griglia',
 'Eggplant Slider',
 'Sausage and Broccoli Rabe Slider',
 'Eggplant Parmigiana Hero',
 'Chicken Parmigiana Hero',
 'Sausage, Peppers and Onions Hero',
 'Classic Eggplant Parmigiana',
 'Stolen Polpette',
 'Sauced Polpette',
 'Sliders Polpette',
 'Sandwich Polpette',
 'Classic Polpette',
 'Linguini Primavera',
 'Penne with Sausage and Broccoli Rabe',
 'Rigatoni with Escarole and White Beans',
 'Rigatoni Filetto di Pomodoro',
 'Gnocchi Marinara',
 'Ravioli a la Vodka',
 'Tortellini with Prosciutto and Peas',
 'Fettuccine Carbonara',
 'Lobster Ravioli',
 'Papardelle Bolognese',
 'Black Linguini',
 'Black Linguini with Salmon',
 'Linguini Vongole',
 'Ling

In [58]:
vect = TfidfVectorizer(stop_words='english')
menu_mat = vect.fit_transform(items)

review_mat = vect.transform(flat_sents)
sim_mat = cosine_similarity(menu_mat, review_mat)
for i in range(sim_mat.shape[1]):
    print(flat_sents[i])
    rankings = np.argsort(sim_mat[:,i])[::-1]
    for r in rankings[:3]:
        print(sim_mat[r,i], items[r])

Evening on the town after leaving treat house we decided to have some Italian food only because I was graving meatballs, this is definitely a cool little spot for brunch with friends.
0.0 Sweet Sausage and Lentils in Honey Rosemary Sauce
0.0 Sliders Polpette
0.0 Eggplant Parmigiana Hero
The service was nice and quick waitress super friendly and attentive.
0.0 Sweet Sausage and Lentils in Honey Rosemary Sauce
0.0 Sliders Polpette
0.0 Eggplant Parmigiana Hero
I ordered  meatball sliders and truffle fries that was to die for.
0.5773502691896257 Truffle Parmesan French Fries
0.4599235135253671 Sliders Polpette
0.0 Eggplant Parmigiana Hero
The meatball slides were perfect!!!!
0.0 Sweet Sausage and Lentils in Honey Rosemary Sauce
0.0 Sliders Polpette
0.0 Eggplant Parmigiana Hero
, they were only appetizers (I believe )but very filling omg I'll go back to order the exact same thing 

My husband ordered chicken pam but it doesnt come with pasta which he found disappointing.
0.5419125722303885 

In [60]:
nps = []

for i, sent in enumerate(flat_sents):
    tokenizer = TextBlob(sent)
    tags = tokenizer.noun_phrases    
    nps.append(' '.join(t for t in list(tags)))
    
nps

['evening italian food cool little spot',
 'quick waitress',
 'meatball sliders truffle fries',
 'meatball slides',
 'filling omg chicken pam',
 'overall nice atmosphere',
 'nicky meatballs meatballs boroughs',
 'menu boasts interesting non-meatball sliders brioche rolls',
 'old menu',
 'bag o',
 'sliders nicky famous sauced brioche rolls',
 'meatballs meatballs ... blah ...',
 '',
 'got',
 '',
 '',
 'meatballs',
 '',
 'tiny',
 'dinner rolls',
 'tiny',
 'dinner',
 'rolls',
 'brioche',
 '',
 '',
 'nope',
 'own damn balls',
 'not worth $',
 "ex 's aunt jersey",
 '',
 'ridiculous prices shitty meatballs tiny dinner rolls',
 '',
 'puttanesca sauce',
 'small side chicken milanese francese etc',
 'small portion prob costs',
 'ironically sausage side dish',
 'cozy nice servers',
 'eat-in place vs order',
 '',
 'bottomless brunch drinks lousy reviews time limit',
 'nuts',
 'best',
 'vodka sauce',
 'mouth water',
 'nyc must do',
 'favorite restaurant happy hour',
 '',
 'pre-theater dinner',
 ''

In [62]:
review_mat = vect.transform(nps)
sim_mat = cosine_similarity(menu_mat, review_mat)
for i in range(sim_mat.shape[1]):
    print(flat_sents[i])
    rankings = np.argsort(sim_mat[:,i])[::-1]
    for r in rankings[:3]:
        print(sim_mat[r,i], items[r])

Evening on the town after leaving treat house we decided to have some Italian food only because I was graving meatballs, this is definitely a cool little spot for brunch with friends.
0.0 Sweet Sausage and Lentils in Honey Rosemary Sauce
0.0 Sliders Polpette
0.0 Eggplant Parmigiana Hero
The service was nice and quick waitress super friendly and attentive.
0.0 Sweet Sausage and Lentils in Honey Rosemary Sauce
0.0 Sliders Polpette
0.0 Eggplant Parmigiana Hero
I ordered  meatball sliders and truffle fries that was to die for.
0.5773502691896257 Truffle Parmesan French Fries
0.4599235135253671 Sliders Polpette
0.0 Eggplant Parmigiana Hero
The meatball slides were perfect!!!!
0.0 Sweet Sausage and Lentils in Honey Rosemary Sauce
0.0 Sliders Polpette
0.0 Eggplant Parmigiana Hero
, they were only appetizers (I believe )but very filling omg I'll go back to order the exact same thing 

My husband ordered chicken pam but it doesnt come with pasta which he found disappointing.
0.6601481336498773 

### Testing Model script

In [63]:
reviews = data[4]['reviews']
menu = data[4]['menu_data'][0]['menu']

In [67]:
sentences = [sent_tokenize(r) for r in reviews]
sentences = [s for sent in sentences for s in sent]

In [70]:
nps = []
for sent in sentences:
    token = TextBlob(sent)
    tags = token.noun_phrases
    nps.append(' '.join(t for t in list(tags)))

In [71]:
nps

['',
 '',
 'chicken vindaloo',
 '',
 '',
 'decent lunch indian option fidi till 3pm express takeaway boxes $ 8- $',
 'chicken tikka masala box tiny salad lentil curry',
 'ctm',
 '',
 'good spices',
 'tandoori bit dry',
 'tandoori shrimp mild chili sauce spicy mint sauce',
 'tamarind yellow lentil curry',
 'future world trade center',
 '',
 '',
 'favorite location',
 'ruchi',
 'excellent awesome waiters',
 'ruchi good lunch spot lunch entrees',
 'seemed',
 'menu prices',
 'saag paneer',
 'paneer cubes',
 'willing ruchi saag paneer',
 'taste atmosphere presentation service | overall',
 'ordered paneer kadai indo fried rice order # grubhub',
 'delivery time pm pm delivery guy',
 'paneer oil large tasteless cucumbers indo-chinese fried',
 '',
 'ruchi high number innumerable number grubhub',
 'be responsible food-provider customer chooses',
 '',
 'ruchi dine-in lunch spot fidi northern indian entrees',
 '',
 'everyone',
 'chicken vindaloo',
 '',
 "ca n't spicy food",
 '',
 'chicken saag bit

In [76]:
items = list(menu.keys())
vect = TfidfVectorizer(stop_words='english')
menu_tfidf = vect.fit_transform(items)

In [79]:
menu_tfidf.shape

(169, 181)

In [80]:
len(items)

169

In [81]:
review_tfidf = vect.transform(nps)
sim_mat = cosine_similarity(menu_tfidf, review_tfidf)

In [82]:
sim_mat.shape

(169, 181)

In [88]:
matches = np.argwhere(sim_mat > 0.6)

In [94]:
polarities = np.zeros(len(items))
for i, sent_index in enumerate(matches[:,1]):
    tb = TextBlob(sentences[sent_index])
    polarities[matches[i][0]] += tb.sentiment.polarity

In [95]:
polarities

array([ 0.        ,  0.28571429,  0.        ,  0.2       ,  0.        ,
        0.2       ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        , -0.33333333,  0.        ,  0.        ,
        1.        ,  0.94027778,  0.        ,  0.        ,  0.        ,
       -0.70972222,  0.        ,  0.        , -0.15      ,  0.        ,
       -0.05972222,  0.        ,  0.        , -0.05972222,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
       -0.33333333, -0.33333333,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.80972222, -0.93333333,
       -0.45833333, -1.1       ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        , -0.75      ,  0.        ,
        0.        , -0.75      ,  0.        ,  0.        ,  0.        ,
       -0.125     , -1.13333333,  0.        ,  0.        ,  0.  

In [99]:
rankings = np.argsort(polarities)[::-1]

In [149]:
best = rankings[:3]

In [146]:
names = [items[b] for b in best]
names

['Shrimp Masala', 'Crab Curry', 'Kheer (Rice Pudding)']

In [135]:
rankings

array([ 77,  82, 167, 113,  15,  91, 116,  16, 160,  90, 155, 103, 105,
       135,   1, 157,  97,   3,   5, 111, 110, 108, 107,  93,  60,  57,
        56,  62,  63,  64,  59,  72,  67,  68,  69,  70,  71,  54,  73,
        74,  75,  76,  78,  79,  80,  81,  55, 168,  53,  26,  22,  21,
        19,  18,  17,  14,  13,  11,  10,   9,   8,   7,   6,   4,   2,
        24,  27,  52,  29,  47,  46,  45,  43,  42,  39,  38,  37,  36,
        35,  34,  33,  32,  31,  30,  44,  84,  83, 144, 142, 141, 140,
       139, 138, 137, 136, 134, 133, 132, 131, 130, 129,  85, 128, 143,
       145, 126, 146, 166, 165, 164, 163, 162, 161, 159, 158, 156, 154,
       151, 150, 149, 148, 147, 127,   0, 125,  98,  96, 102, 124, 101,
       112, 100,  95, 114,  99, 115,  94, 117, 118, 119, 120, 121, 122,
        92,  89,  88,  87,  86, 123, 104, 152, 106,  25,  28,  65,  23,
        41,  12,  40, 109,  50, 153,  20,  61,  58,  48,  49,  51,  66])

In [137]:
worst = rankings[-3:]

In [138]:
names = [items[w] for w in worst]

In [139]:
names

['Chicken Makhani (Butter)', 'Chicken Saag', 'Lamb Saag']

In [140]:
descriptions = [menu[n][0] for n in names]
prices = [menu[n][1] for n in names]

In [141]:
descriptions

['Boneless tandoori chicken pieces cooked in a rich tomato cream sauce.',
 'Marinated chicken cooked with fresh creamy spinach.',
 'Marinated lamb cooked with fresh creamy spinach.']

In [142]:
prices

[14.95, 14.95, 15.95]

In [144]:
reviews

["This place is such a hole in the wall! I walked right by it twice looking for it. \n\nI loved the food though, the chicken vindaloo was so flavorful, spicy but not so much that I couldn't enjoy it. \n\nThe inside of the restaurant is beautiful too!\n\nWould return.",
 'decent lunch indian option in the FiDi area; they offer 50% off for eating in till 3pm and express takeaway boxes for $8-$12.\n\nI ordered the chicken tikka masala box which came with a tiny salad (no dressing) and lentil curry. \xa0the CTM was a bit on the oily side and did not have any potatoes.',
 'The four stars is really for the food. It was tasty, good spices. The Tandoori Chicken was a bit dry. The Tandoori Shrimp was served with a mild chili sauce and a very spicy mint sauce. The Tamarind rice was delicious, as was the Yellow Lentil Curry.\n\nThey are located on a corner, right across from the future World Trade Center 5 site. The interior was okay. It was a little crowded, seating 15-20 people. \n\nThe restaur