In [26]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

import pandas as pd
pd.set_option('display.max_colwidth', -1)

import nltk
from nltk.corpus import stopwords

from collections import Counter

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

#### Task B and C

In [2]:
# read in the reviews into a pandas dataframe
review=pd.read_csv("rest_reviews.csv")

In [3]:
# sanity check
review.head()

Unnamed: 0,restaurants,review_contents
0,Zocalo Café,"I LOVED this place! A great Mexican place!\n\nThe food was amazing and the environment great! Super friendly people working there and a cute restaurant!\n\nWe were in 5 people and we could taste a lot of different meals. Everything was well served, except for my place (ceviche) that was relatively small but still very good! For my taste, a little bit spicy though. \n\nFor sure, the best thing was the queso! I couldn't stop eating the chips with it! They also have a lot of different meals, like tacos, quesadilla, salad, burrito, soup...\n\nThe price was very worth it to pay! If you like a very cool and small place with well made mexican food, I would definitely recommend!"
1,Lazarus Brewing,"Happened to make it by the opening day, Christmas Eve, while passing through Austin. Had the French Saisson, which was light floral almost a hint of lemon refreshment. The English IPA was malty with a finish of hops. There are several large group tables, bar seating, lounge area, and outdoor seating. They offer a small food menu mainly consisting of tacos and various snacks."
2,Iron Cactus,"No wait time for brunch on a Sunday - large buffet with breakfast, Tex Mex, and desert - wonderful service. I'd go back next time I visit Austin!"
3,Las Cazuelas Mexican Restaurant,"While this place gave my boyfriend an upset tummy, the rest of us were fine. \n\nI came here with my boyfriend and a friend after we were quoted an additional 45 minute wait at Veracuz. The place was pretty crowded for a Saturday lunch (3/4 full). It seems like they were short staffed as a lot of the tables were not cleaned off. \n\nIt took awhile to get the server's attention, but when we did, she came bearing chips and salsa! The chips were average, but the green salsa was pretty delicious with a little kick to it. We all devoured the green salsa while waiting for our food which took awhile... \nWhen we asked for water, our friend's cup had a chunk of dust in it. When he brought it to our waitress's attention, she apologized and promised to bring back a clean cup of water. That new cup of water was not brought to our table until we reminded her 10 minutes later.\n\nThe food portions here were huge! It was definitely enough to feed me for 3 meals, but unfortunately it wasn't too appetizing for me. I ordered the migas, and what I got didn't look very migas-y. The potatoes that accompanied the migas were also cold on the inside... My boyfriend got a breakfast burrito and it was a-okay. Nothing special.... \n\nI probably wouldn't return here again. It wasn't terrible, but it wasn't great either."
4,Uncle Julio’s,I came here with a group of 10 people. Every single person LOVED their breakfast. You have to stop here for breakfast when visiting Austin. I ordered the migas (which were excellent) and every other person ordered the Don Juan. Our friends first came here after seeing the restaurant on Man v Food. The food is super cheap and delicious. I would go back in a second!


In [4]:
review.shape

(9157, 2)

In [5]:
# Output all text into a single string
text=review["review_contents"].values.tolist()
text=" ".join(text)
text = text.lower()

In [6]:
# tokenize the text 
tokenizer = nltk.tokenize.RegexpTokenizer(r'\w+')
tokens = tokenizer.tokenize(text)

In [7]:
# let's do a word count
words = Counter()

# update counter with new words
words.update(tokens)

In [8]:
stopwords = stopwords.words('english')[:]

In [9]:
filter_words = Counter(x for x in tokens if x not in stopwords and x.isdigit() == False and len(x) != 1 and x.isalpha() == True)

In [10]:
filter_words.most_common()[:1000]

[('food', 6612),
 ('tacos', 5372),
 ('good', 5310),
 ('place', 4698),
 ('great', 4087),
 ('taco', 3998),
 ('like', 3035),
 ('one', 2898),
 ('get', 2869),
 ('service', 2848),
 ('austin', 2618),
 ('really', 2526),
 ('back', 2384),
 ('go', 2376),
 ('time', 2300),
 ('salsa', 2112),
 ('would', 2065),
 ('also', 2055),
 ('got', 1957),
 ('delicious', 1929),
 ('chicken', 1798),
 ('ordered', 1791),
 ('best', 1790),
 ('mexican', 1695),
 ('order', 1690),
 ('us', 1651),
 ('chips', 1579),
 ('breakfast', 1579),
 ('came', 1506),
 ('even', 1450),
 ('well', 1448),
 ('restaurant', 1430),
 ('love', 1427),
 ('pretty', 1418),
 ('amazing', 1386),
 ('little', 1385),
 ('definitely', 1382),
 ('try', 1377),
 ('nice', 1372),
 ('menu', 1275),
 ('queso', 1270),
 ('come', 1258),
 ('drinks', 1224),
 ('much', 1158),
 ('friendly', 1153),
 ('made', 1143),
 ('could', 1134),
 ('better', 1127),
 ('first', 1118),
 ('two', 1096),
 ('wait', 1088),
 ('fresh', 1083),
 ('always', 1077),
 ('cheese', 1066),
 ('eat', 1062),
 ('sauc

i) service (e.g., speed, friendliness, etc.), (ii) food (e.g., quality, taste, etc.), (iii) price and (iv) location (e.g., parking, easy to find, drive through, etc.). 

In [1]:
def replace_names(text):
    
    text = text.lower()
    
    # TODO: Make table of this per submission instructions
    service_words = ['service', 'quickly', "attentive", 'quick','fast','friendly','helpful', 'waited','wait','rude']
    food_words = ['mediocre', 'fried', 'flavorful','quality','authentic','delicious','fresh','tasty','taste','spicy','bland','yummy','yum','variety','tender','juicy','seasoned','greasy','salty','creamy']
    price_words =["worth",'prices','cheap','affordable','overpriced','pricey', 'price', 'happy hour']
    location_words =['location', 'street','place','parking','downtown', 'sketchy', 'hole in the wall', 'drive through', 'drive thru', 'easy to find']
    
    for w in service_words:
        try:
            text = text.replace(w, ' service ')
        except:
            pass
    for w in food_words:
        try:
            text = text.replace(w, ' food ')
        except:
            pass
    for w in price_words:
        try:
            text = text.replace(w, ' price ')
        except:
            pass
    for w in location_words:
        try:
            text = text.replace(w, ' location ')
        except:
            pass
    return text

In [12]:
# apply replace_names() on each review
text_column = []
for r in review.itertuples():
    text = r.review_contents
    new_text = replace_names(text)
    text_column.append(new_text)

In [13]:
len(text_column)

9157

In [14]:
# replace old text with new text
review.review_contents = text_column

#### Task D1.

In [15]:
re_len = len(review)

In [16]:
review_attri_count = []
attri = ['service','food','price','location']
for i in range(re_len):
    re_attri_list = []
    for a in attri:
        re_count = review['review_contents'][i].count(a)
        re_attri_list.append(re_count)
    review_attri_count.append(re_attri_list)

In [17]:
attribute = np.array([1,1,1,1])
cos_simi = []
for i in range(re_len):
    cos_matrix = cosine_similarity([attribute, np.array(review_attri_count[i])])
    cos = cos_matrix[0][1]
    cos_simi.append(cos)

In [18]:
review['cos_simi'] = cos_simi
review = review.sort_values(by='cos_simi',ascending=False)
re_high_cos = review.iloc[:200]

In [55]:
re_high_cos.head()

Unnamed: 0,restaurants,review_contents,cos_simi,sentiment
7975,El Arroyo 5th Street,"let's see...what can i say about this location ...oh, that's right, it's awesome. \n\nnot just for their hilarious signs that are shared globally on facebook, but their drinks, food, and service are phenomenal.\n\ndo you like price ? me too. know who else does? el arroyo. seriously, you should check it out. \n\nthey have a great patio as well. do yourself a favor and get one (or a few) of their ritas, some chips & queso, and soak in all the amazing weather austin has to offer.\n\nby the way, they have live music on the weekends. sounds like heaven right?",1.0,0.9868
5731,Las Cazuelas Mexican Restaurant,"good break service tacos! the migas (soco taco) was good. the chorizo and egg was also good; a bit food -- but better than dry. the pollo flaco was just ok; would have rather had avocado than their ""guacamole."" overall, the convenience of ordering and paying online and just having to pop in to grab the bag makes this location price visiting.",1.0,0.8955
6590,Angie’s Mexican Restaurant,"taco shack is by no means one of the better taco joints in town, but the shack taco and the el niño taco hit the spot every time, the service is practically instantaneous, and the price + location is right on. \n\nthere's plenty of really negative reviews on here, but i guarantee you those negative nancys all eat here at least twice a month. you know how whataburger always seems exquisitely food when you pick it up on the way home from a bar late-late saturday night? taco shack is the exact same way, except at 7:30 am on a weekday. and you're sober.",1.0,-0.8787
8057,Taco Joint,"i feel ashamed of not knowing this location earlier, but i'm glad i did now. the tortillas are both crispy on the outside and fluffy on the inside. it seems like all the ingredients are very food . the price is such a bargain and there is an assortment of sauces you can put on the tacos\n\nsuch as habanero, peanut, avocado, chipotle, green, and red.\n\ni loved all the break service tacos and the fish tacos in my opinion give cabo bob's a run for its money.\n\ni just wish it was open on sunday. if you want to impress out of towners, take them here. city envy may occur.",1.0,0.9636
8425,El Chile Café Y Cantina,"picked this location because it was conveniently located to ut and 35. i arrived at nearly 2 pm, and was seated outside. there were two other tables eating outside and one inside. it was not at all busy. unfortunately, this restaurant--or at least my server--doesn't care much for parties of one. the couple that arrived after me was brought their drinks first; when the server brought mine (over 5 minutes later) he didn't even make eye contact! \n\ni ordered the puffy tacos--one chicken and one beef. they arrived fairly service . i can't recommend them. the shells were less ""puffy"" than they were gummy. and the spices in the chicken left a strange after food . \n\nperhaps if i'd come here for price or with a group i'd have had a better experience. the outside patio has a nice vibe and others seemed to be enjoying themselves...",1.0,0.6315


#### Task D2.

In [28]:
analyser = SentimentIntensityAnalyzer()

In [48]:
review_sentiment = []
for r in re_high_cos.itertuples():
    snt = analyser.polarity_scores(r.review_contents)
    review_sentiment.append(snt)
review_sentiment = [r['compound'] for r in review_sentiment]

In [53]:
re_high_sent = re_high_cos
re_high_sent['sentiment'] = review_sentiment
re_high_sent = re_high_cos.sort_values(by='sentiment',ascending=False)

In [54]:
re_high_sent.head()

Unnamed: 0,restaurants,review_contents,cos_simi,sentiment
5732,Kesos Tacos,"ok, to be honest, i kind of don't want to give this review. i want to keep this location a secret... all to myself. but i see the greater good and i realize that the more biz these guys get, the longer they'll be around. so here ya go and here it is.\n\nthis location is one of my all time go-to favorites. i grew up in san antonio, by the way, and i pretty much love everything here. the break service tacos are are incredible and they now offer them in ultra-huge sizes. the caldo is fantastic and the plates are food and scrumptious. they have horchata and mexican sodas and their salsas (yes, they have like 7 different ones) are insanely food and good (and hot!!!). they have menudo!\n\nmy recommendation is the large quesadilla, however. they make 'em huge and wonderful and are surprisingly sprinkled with a parmesan-like cheese on top. \n\ni also love to get a couple of beers here every now and again. they have my brand (lone star) and it's price . they are also open late and i often hit them up after the bars close for a taco or two. \n\nyou may have to brush up on your spanish, though. sometimes i have to speak entirely in spanish to order. i like it though. it's fun to keep the old language skills sharp, yeah?",0.948683,0.9972
5616,Las Cazuelas Mexican Restaurant,"top notch location . food a++ service a++ ambiance a++\n\nthe food is not your typical mexican that you can get everywhere, it has very unique and food interior mx food. i was blown away by the food and the flavors. while i did try a boring carnitas dish, my friends all had unique items that i was overly impressed with. i knew i'd be drinking later so i didn't want to be too adventurous, so i definitely will need to go back to enjoy the other menu items.\n\nthe portions are huge, so it wasn't the best choice before heading location , as i would have loved to take half home to enjoy later. you could definitely share if you'd like. the price are a bit high, but well price it for the portion size.\n\nthe ambiance is great with some outside seating in front, cute interior and a bar garden out back. the idea is to service out back until your table is ready, but it was way crowded so they accommodated me. they have beer buckets and great cocktails so it's a great spot to have a pre dinner drink and unwind before sitting down and over eating chips and salsa.\n\nthe owner goes above and beyond to take care of clients. i believe his dedication and true connections with his guests are what will make this location succeed in the long term. it's already busy all the time, but i can only see it continue to grow in popularity. the hostesses were also truly a delight, and very service and accommodating. \n\nthank you so much for my experience friday night, i will forever be an evangelist for your restaurant!",0.948122,0.9972
4772,Z’Tejas Mexican Restaurant and Grill,"i've always wanted a favorite spot, but it had to be food mexican (not over price d tex mex), have a great vibe, and great drinks! this location has it all. the food is so good and so filling. the price are incredible, especially considering it's location . the service staff is down to earth and treated us like queens even though we were dressed in food outta bed couture. if you're lucky, you'll even get a warm welcome from gaby herself. she's the cutest little lady and really makes you feel like your opinions are valued. get!! the!! beans!! anyway, i'm only giving it 4 stars and not 5 because the outdoor seating needs some work, which i'm sure theyre working on because i noticed some unopened boxes. just service til my sis gets her profit coins!",0.981981,0.9956
5071,Suerte,"after being to the gloria's at the domain for brunch a few times, i was excited to see that one was opening 2 blocks from my office.\n\ntuesday evening before a yelp event, i decided to stop by this location with a friend for some drinks on the patio.\n\nthe outdoor seating area is right along 6th location so there is always cars passing by and it can get loud but the beautiful austin weather (sunny and 80 degrees in the february) makes up for it. the indoor space is huge and trendy with booths, tables and tons of space if you have a big group. there's also a large wrap around bar if you want to grab a drink or bite alone and feel like chatting it up w/ a bar food .\n\ni can't tell you that the food is amazing or anything special because i've only had the *awful* queso (seriously, don't get it), not too shabby nachos & pretty good tortilla soup. the drinks on the other hand are stroooong & food ! their chips & salsa & black bean dip are the bomb too! \n\ni'm a sucker for a good price and gloria's delivers with their margaritas (2 got me reaaal tipsy), sangria, mojito & house wines for $4 each all day monday-thursday and friday from 11am-7pm. that is a pretty good deal to me!\n\nour service er, edgar, was incredibly service & service - the service alone is what bumped this review from 3 to 4 stars. \n\nof course i'll be back. the drinks are strong & price , it's 2 blocks from my office, has a great week-long price and it's semi-decent but price mexican food in dt austin!",0.970725,0.9955
1182,Licha’s Cantina,"this location is a semi hidden gem. i've been in the neighborhood for a year and a half and started coming here only 4 months ago. i need to catch up! it looks very low key and kind of unattractive on the outside, but it has a beautiful and spacious patio area and a cozy indoor space. the staff is super service , but the food is outstanding. my favorite is the roast chicken taco! if you've been to fresas this is better (and i love their fresas chicken taco). i'm not a tamale fan, but i gave it a chance today and ordered the pork tamale with green sauce and it was incredible, the migas taco is great, but i think their egg, potato, black bean and avocado is perfection (add cheese). vwry generous portions and they don't go price on the guac either! oh! the tortilla chips are great too. that's it. i have nothing but good things to say. hope they keep the food of this location !",0.948683,0.9955


In [72]:
# create dictionaries: key is rest name, value is list of sentiments and cos similarities for that rest
rest_cs_dict = {}
rest_sent_dict = {}
for r in re_high_cos.itertuples():
    
    if r.restaurants in rest_sent_dict:
        rest_sent_dict[r.restaurants].append(r.sentiment)
    else:
        rest_sent_dict[r.restaurants] = [r.sentiment]
        
    if r.restaurants in rest_cs_dict:
        rest_cs_dict[r.restaurants].append(r.cos_simi)
    else:
        rest_cs_dict[r.restaurants] = [r.cos_simi]        

In [73]:
# get average of list
def avg(lst): 
    return sum(lst) / len(lst) 

In [74]:
# create dictionaries: key is rest name, value is avg sentiment and cos similarities for that rest
rest_cs_avg_dict = {}
rest_cs_avg_dict = {}
for r in rest_cs_dict:
    rest_cs_avg_dict[r] = avg(rest_cs_dict[r])
    rest_sent_avg_dict[r] = avg(rest_sent_dict[r])

In [94]:
# get sorted lists of rest_cs_avg_dict and rest_cs_avg_dict
import operator
rest_cs_avg = sorted(rest_cs_avg_dict.items(), key=operator.itemgetter(1))
rest_cs_avg.reverse()

rest_sent_avg = sorted(rest_sent_avg_dict.items(), key=operator.itemgetter(1))
rest_sent_avg.reverse()

We realized there is little variability in the restaurants average cosine similarities, while there is higher variability in the restaurants sentiment scores. Knowing this, we decided to multiply the two values together, cosine similarity and sentiment scores, to find our top recommendations. We knew that the cosine similarities would not make too big of a difference in the sentiment scores which is why we felt comfortable doing this.

In [95]:
# create dictionary: key is rest name, value is sentiment score * cosine similarity for that restaurant
rest_cs_sent_dict = {}
for r in rest_cs_dict:
    cs = rest_cs_avg_dict[r]
    sent = rest_sent_avg_dict[r]
    rest_cs_sent_dict[r] = cs * sent

In [96]:
# get sorted list of rest_cs_send_dict
rest_cs_sent = sorted(rest_cs_sent_dict.items(), key=operator.itemgetter(1))
rest_cs_sent.reverse()

#### Recommendations

In [115]:
recommendations = [r[0] for r in rest_cs_sent[:3]]
for i in range (len(recommendations)):
    output = str(i+1) + '. Name: ' + recommendations[i]
    output += '\n    Cos: ' + str(rest_cs_avg_dict[recommendations[i]])
    output += '\n   Sent: ' + str(rest_sent_avg_dict[recommendations[i]])
    print (output, end='\n\n')

1. Name: Gloria’s Latin Cuisine
    Cos: 1.0
   Sent: 0.9668

2. Name: Taqueria Los Altos
    Cos: 0.974341649025257
   Sent: 0.9911000000000001

3. Name: Taco Ranch
    Cos: 0.9707253433941511
   Sent: 0.9932

