### **Connecting to Google Drive and Importing Libraries**

In [None]:
# Mounting drive to our 'Data_606_Capstone' folder 

from google.colab import drive
drive.mount("/content/drive", force_remount=True)
!pwd

Mounted at /content/drive
/content


In [None]:
#Importing Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json


### **Reading and Previewing Dataframe**

This CSV file contains merged datapoints from 'Business' and 'Review' datasets. We merged and converted file from JSON to CSV so we can modify easily using Pandas. 

In [None]:
# Reading 'Review' dataset in CSV Format 

yelp_biz_review = pd.read_csv("drive/My Drive/Datasets/yelp_reviews_food_categories.csv")

### **Dataframe Specs**

In [None]:
yelp_biz_review = yelp_biz_review.drop(['user_id'], axis=1)
yelp_biz_review = yelp_biz_review.drop(['stars_y'], axis=1)

In [None]:
# Dropping null values

yelp_biz_review = yelp_biz_review.dropna()
yelp_biz_review.isna().sum()

business_id     0
name            0
address         0
city            0
state           0
postal_code     0
latitude        0
longitude       0
stars_x         0
review_count    0
is_open         0
attributes      0
categories      0
review_id       0
useful          0
funny           0
cool            0
text            0
date            0
dtype: int64

In [None]:
# Changing datatype for 'review_count' column to int64 from float64

yelp_biz_review["review_count"] = yelp_biz_review['review_count'].astype('int64')

# Keywords

**Gensim Keyword Extraction**

In [None]:
pip install gensim

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from gensim.summarization import keywords

In [None]:
j=1
for i in yelp_biz_review.head(15)['text']:
  print(j)
  print(keywords(i))
  j+=1

1
yelp
star
stars
dandy
2
ordered
order
orders
water
food
waitress
waitresses
honey
flavor
flavors
fresh
service
nice
issue
plate
plates
tasted
taste
huge
relleno
mushroom
mushrooms
experience
bad
table
tables
minutes
minute
like
sub
summary
3
nice
clean
minutes
sauce
del
enchiladas
4
asada
good hubby
5
food
customer
6
craving
fajitas
7
nice
usually
food
8

9
place
10
authentic
stomach
anymore
11
mexican
waiting
dessert
pretty
food
12
drink
13

14
larger
15
excellent
restaurant


**Spacy Keyword Extraction**

In [None]:
import spacy
proc=spacy.load('en_core_web_sm')


In [None]:
j=1
for i in yelp_biz_review.head(15)['text']:
  print(j)
  review=proc(i)
  print(review.ents)
  j+=1

1
(Western Indiana, 5, Mexican, Mexican, 5, 3)
2
(Tuesday, July 3rd, 2018, close to 6pm, 10 minutes, 3, 10 minute, another 15 minutes, another 15 minutes, 3, 5 minutes, 35-40 minutes, one, Fresh, one, close to 2 hours, 1, half, 3, 2)
3
(today, 50 minutes, Del Mar, Friday, Mexican, 5)
4
(Carne Asada, Brownsburg)
5
(Great Mexican,)
6
(half,)
7
(Mexican, Mexican)
8
(mexican,)
9
(1,)
10
(a few months, Mexican, American)
11
(Mexican, Avon, Brownsburg, monthly, the last Sunday of each month, Mexican, Mexican, Mexican)
12
(MX, daily)
13
(3,)
14
(an hour, 30-40 minutes)
15
(Mexican, first)


**YAKE Keyword Extraction**

In [None]:
pip install yake

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting yake
  Downloading yake-0.4.8-py2.py3-none-any.whl (60 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/60.2 KB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.2/60.2 KB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Collecting segtok
  Downloading segtok-1.5.11-py3-none-any.whl (24 kB)
Collecting jellyfish
  Downloading jellyfish-0.9.0.tar.gz (132 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.6/132.6 KB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: jellyfish
  Building wheel for jellyfish (setup.py) ... [?25l[?25hdone
  Created wheel for jellyfish: filename=jellyfish-0.9.0-cp39-cp39-linux_x86_64.whl size=81473 sha256=93f7db85f0a762c2cd8122d01530935b4eccb71dbe26817d40264d95e6043161


In [None]:
import yake
keywd_extractor=yake.KeywordExtractor(lan='en', dedupLim=0.1, top=10)

In [None]:
keywords=keywd_extractor.extract_keywords(yelp_biz_review['text'][0])
for kw in keywords:
 print(kw)

('Western Indiana', 0.00441587780089389)
('festival in Western', 0.012829574325714889)
('playing a festival', 0.03296919743954652)
('stop for supper', 0.03296919743954652)
('Indiana', 0.06200662685046662)
('Western', 0.07090312404319239)
('hand dandy yelp', 0.11182409164858814)
('place to stop', 0.1406834334819147)
('home', 0.14979359948190865)
('Mexican', 0.17053197146429622)


In [None]:
j=1
for i in yelp_biz_review.head(15)['text']:
  print(j)
  keywords=keywd_extractor.extract_keywords(i)
  for k in keywords:
    print(k[0])
  j+=1

1
Western Indiana
festival in Western
playing a festival
stop for supper
Indiana
Western
hand dandy yelp
place to stop
home
Mexican
2
happened on Tuesday
food
tacos
chile relleno
July
Tuesday
water
waitress
minutes
honey
3
enchiladas Del Mar
Del Mar
wife here today
Nice
clean
good
Del
Mar
appointment
Friday for lunch
4
Carne Asada tacos
tacos were good
hubby had chicken
spinach enchiladas
chicken and spinach
Asada
Carne
good
hubby
wonderfully friendly
5
Delicious food
generous portions
food
Great Mexican
portions
service
customer service
Mexican food restaurant
generous
Lots
6
craving fajitas
lunch
craving
food
fajitas
service
back
half-eaten food
meal
check
7
Mexican food
priced for Mexican
Food
decent
priced
Nice
Menu
increased
gourmet touch
higher prices
8
Good mexican food
food and prices
Good
mexican
prices
bad
authentic
9
Worst place
Worst
place
alcohol
ma'am
rate this place
star
margarita
lady told
rate
10
disappointed
food
Mexican restaurants-they
lot has changed
anymore
Americ

**Textrank Keyword Extraction**

In [None]:
pip install pytextrank

In [None]:
import spacy
import pytextrank

In [None]:
nlp=spacy.load("en_core_web_sm")

In [None]:
nlp.add_pipe("textrank")
doc=nlp(yelp_biz_review['text'][0])
for phrase in doc._.phrases[:10]:
    print(phrase.text)

3 stars
Mexican
supper
Western Indiana
a Mexican fusion restaurant
certainly 5 star
a Mexican restaurant
drinks
salsa
the hand dandy yelp


In [None]:
doc._.phrases[0].text

'excellent food'

In [None]:
j=1
for i in yelp_biz_review.head(15)['text']:
  print(j)
  doc=nlp(i)
  for phrase in doc._.phrases[:10]:
    print(phrase.text)
  j+=1

1
3 stars
Mexican
supper
Western Indiana
a Mexican fusion restaurant
certainly 5 star
a Mexican restaurant
drinks
salsa
the hand dandy yelp
2
tap water
water
empty tables
orders
steak tacos
mushroom tacos
more time
July 3rd
Mushroom tacos
freezer burn
3
green sauce
different parts
lunch
Nice patio area
Mexican food
Del Mar
Good margarita
Friday
today
a strip mall
4
spinach enchiladas
chicken
Carne Asada tacos
hubby
Brownsburg
Carne Asada
the Brownsburg residents
a large party
the server
The restaurant
5
Great Mexican food restaurant
Delicious food
Great Mexican
choices
Very attentive customer service
Lots
Very generous portions
the menu
6
fajitas
Convenient location
lunch
plates
the fajitas
a much better meal and service experience
Queso
the vegetables
half-eaten food
our check
7
higher prices
Mexican food
nice decor
Nice clean restaurant
prices
Food
dishes
Mexican
a gourmet touch
Menu
8
Good mexican food
prices
mexican
9
alcohol
lots
my margarita
no alcohol
another one
this place
The 

In [None]:
tr_keyword_s=[]
for i in yelp_biz_review['text']:
  l=[]
  doc=nlp(i)
  for phrase in doc._.phrases[:10]:
    l.append(phrase.text)
  tr_keyword_s.append(l)

In [None]:
len(tr_keyword_s)

163270

In [None]:
tr_keyword_s[0:20]

[['3 stars',
  'Mexican',
  'supper',
  'Western Indiana',
  'a Mexican fusion restaurant',
  'certainly 5 star',
  'a Mexican restaurant',
  'drinks',
  'salsa',
  'the hand dandy yelp'],
 ['tap water',
  'water',
  'empty tables',
  'orders',
  'steak tacos',
  'mushroom tacos',
  'more time',
  'July 3rd',
  'Mushroom tacos',
  'freezer burn'],
 ['green sauce',
  'different parts',
  'lunch',
  'Nice patio area',
  'Mexican food',
  'Del Mar',
  'Good margarita',
  'Friday',
  'today',
  'a strip mall'],
 ['spinach enchiladas',
  'chicken',
  'Carne Asada tacos',
  'hubby',
  'Brownsburg',
  'Carne Asada',
  'the Brownsburg residents',
  'a large party',
  'the server',
  'The restaurant'],
 ['Great Mexican food restaurant',
  'Delicious food',
  'Great Mexican',
  'choices',
  'Very attentive customer service',
  'Lots',
  'Very generous portions',
  'the menu'],
 ['fajitas',
  'Convenient location',
  'lunch',
  'plates',
  'the fajitas',
  'a much better meal and service experien

**RAKE Keyword Extraction**

In [None]:
import nltk

In [None]:
nltk.download("stopwords")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
pip install rake-nltk

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rake-nltk
  Downloading rake_nltk-1.0.6-py3-none-any.whl (9.1 kB)
Installing collected packages: rake-nltk
Successfully installed rake-nltk-1.0.6


In [None]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
from rake_nltk import Rake

In [None]:
r=Rake()


In [None]:
j=1
for i in yelp_biz_review.head(5)['text']:
  print(j)
  r.extract_keywords_from_text(i)
  print(r.get_ranked_phrases()[0:10])
  j+=1

1
['outstanding ..... certainly 5 star', '5 star review', 'quite extended wait', 'hand dandy yelp', 'mexican fusion restaurant', 'mexican restaurant', 'western indiana', 'way home', 'really enjoyed', 'pretty standard']
2
['table ... maybe 10 minutes', 'fresh sliced avocados made', '1 waitress serving half', 'chips within 5 minutes', 'fresh pico really made', 'freezer burned tasted nice', 'cold melt water squirted', '3 empty tables wondering', 'honey ordered steak tacos', 'another 15 minutes']
3
['top 5 overall places', 'live 50 minutes away', 'mexican food across', 'literally every item', 'enchiladas del mar', 'clean atmosphere inside', 'nice patio area', 'clean bathrooms', 'cheese enchiladas', 'strip mall']
4
['server working well', 'large party next', 'carne asada tacos', 'wonderfully friendly', 'spinach enchiladas', 'could see', 'brownsburg residents', 'us', 'spacious', 'service']
5
['great mexican food restaurant', 'attentive customer service', 'delicious food', 'generous portions'

In [None]:
keyword_s=[]
for i in yelp_biz_review['text']:
  r.extract_keywords_from_text(i)
  keyword_s.append(r.get_ranked_phrases()[0:10])

In [None]:
keyword_s[0:10]

[['outstanding ..... certainly 5 star',
  '5 star review',
  'quite extended wait',
  'hand dandy yelp',
  'mexican fusion restaurant',
  'mexican restaurant',
  'western indiana',
  'way home',
  'really enjoyed',
  'pretty standard'],
 ['table ... maybe 10 minutes',
  'fresh sliced avocados made',
  '1 waitress serving half',
  'chips within 5 minutes',
  'fresh pico really made',
  'freezer burned tasted nice',
  'cold melt water squirted',
  '3 empty tables wondering',
  'honey ordered steak tacos',
  'another 15 minutes'],
 ['top 5 overall places',
  'live 50 minutes away',
  'mexican food across',
  'literally every item',
  'enchiladas del mar',
  'clean atmosphere inside',
  'nice patio area',
  'clean bathrooms',
  'cheese enchiladas',
  'strip mall'],
 ['server working well',
  'large party next',
  'carne asada tacos',
  'wonderfully friendly',
  'spinach enchiladas',
  'could see',
  'brownsburg residents',
  'us',
  'spacious',
  'service'],
 ['great mexican food restauran

In [None]:
yelp_df=yelp_biz_review

In [None]:
yelp_df['keywords']=keyword_s

In [None]:
yelp_df.head(5)

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars_x,review_count,is_open,attributes,categories,review_id,useful,funny,cool,text,date,keywords
0,2AwhlMOMsYXJvskZKKG2GA,Tequila Sunrise,1551 N Green St,Brownsburg,IN,46112,39.867338,-86.390529,3.5,112,1.0,"{'NoiseLevel': ""u'average'"", 'BusinessParking'...","Restaurants, Mexican",djujEmxqpY5bmEJ3YaXiBg,0.0,0.0,0.0,We'd been playing a festival in Western Indian...,2017-09-18 17:29:03,"[outstanding ..... certainly 5 star, 5 star re..."
1,2AwhlMOMsYXJvskZKKG2GA,Tequila Sunrise,1551 N Green St,Brownsburg,IN,46112,39.867338,-86.390529,3.5,112,1.0,"{'NoiseLevel': ""u'average'"", 'BusinessParking'...","Restaurants, Mexican",x3LyEt-uiBFCUw65oc0y5g,0.0,0.0,0.0,"The experience happened on Tuesday, July 3rd, ...",2018-07-04 01:02:30,"[table ... maybe 10 minutes, fresh sliced avoc..."
2,2AwhlMOMsYXJvskZKKG2GA,Tequila Sunrise,1551 N Green St,Brownsburg,IN,46112,39.867338,-86.390529,3.5,112,1.0,"{'NoiseLevel': ""u'average'"", 'BusinessParking'...","Restaurants, Mexican",zZcbhKIqxcgjWO8LJUJUaQ,0.0,0.0,0.0,Phe-nom-nom-nom-enal! Took my wife here today ...,2016-10-13 02:00:28,"[top 5 overall places, live 50 minutes away, m..."
3,2AwhlMOMsYXJvskZKKG2GA,Tequila Sunrise,1551 N Green St,Brownsburg,IN,46112,39.867338,-86.390529,3.5,112,1.0,"{'NoiseLevel': ""u'average'"", 'BusinessParking'...","Restaurants, Mexican",ZoN7GbeNGFyPwwKdMEr_Mw,0.0,0.0,0.0,"Carne Asada tacos were good, hubby had chicken...",2016-07-12 15:05:25,"[server working well, large party next, carne ..."
4,2AwhlMOMsYXJvskZKKG2GA,Tequila Sunrise,1551 N Green St,Brownsburg,IN,46112,39.867338,-86.390529,3.5,112,1.0,"{'NoiseLevel': ""u'average'"", 'BusinessParking'...","Restaurants, Mexican",NlQl6Z5UOFmAgSiMo0CbHA,0.0,0.0,0.0,Delicious food. Very generous portions. Very...,2018-07-03 23:28:02,"[great mexican food restaurant, attentive cust..."


**NMF**

In [None]:
from nltk.corpus import stopwords
from nltk.probability import FreqDist
from sklearn.feature_extraction.text import CountVectorizer
nltk.download('punkt')
nltk.download('stopwords')
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF
import time
import pickle
from nltk.stem import WordNetLemmatizer
from nltk.stem import PorterStemmer 
nltk.download('omw-1.4')
import string
nltk.download('averaged_perceptron_tagger')
nltk.download('all')

In [None]:
def clean_reviews(review):
                    stop_words=set(stopwords.words('english')+['``',"''"])
                    lem=WordNetLemmatizer()
                    table=str.maketrans('','',string.punctuation)
                    new_string=review.translate(table)
                    tok_list=[]
                    tok_text=nltk.word_tokenize(review)
                    word_list=[]
                    for tok in tok_text:
                        if tok.lower() not in stop_words:
                            if tok.isalpha()==True:
                                lemword=lem.lemmatize(tok.lower())
                                word_list.append(lemword)
                    words=' '.join(word_list)
            
                    return words

In [None]:
def tokens(review):
                    stop_words=set(stopwords.words('english')+['``',"''"])
                    lem=WordNetLemmatizer()
                    table=str.maketrans('','',string.punctuation)
                    new_string=review.translate(table)
                    tok_list=[]
                    tok_text=nltk.word_tokenize(review)
                    word_list=[]
                    for tok in tok_text:
                        if tok.lower() not in stop_words:
                            if tok.isalpha()==True:
                                lemword=lem.lemmatize(tok.lower())
                                word_list.append(tok.lower())
            
                    return word_list

In [None]:
clean_review_list=[]
token_s=[]
for review in yelp_df['text'][0:100000]:
  c=clean_reviews(review)
  clean_review_list.append(c)
  token_s.append(tokens(review))

In [None]:
yelp_df

Unnamed: 0,business_id,name,address,city,state,postal_code,latitude,longitude,stars_x,review_count,is_open,attributes,categories,review_id,useful,funny,cool,text,date
0,2AwhlMOMsYXJvskZKKG2GA,Tequila Sunrise,1551 N Green St,Brownsburg,IN,46112,39.867338,-86.390529,3.5,112,1.0,"{'NoiseLevel': ""u'average'"", 'BusinessParking'...","Restaurants, Mexican",djujEmxqpY5bmEJ3YaXiBg,0.0,0.0,0.0,We'd been playing a festival in Western Indian...,2017-09-18 17:29:03
1,2AwhlMOMsYXJvskZKKG2GA,Tequila Sunrise,1551 N Green St,Brownsburg,IN,46112,39.867338,-86.390529,3.5,112,1.0,"{'NoiseLevel': ""u'average'"", 'BusinessParking'...","Restaurants, Mexican",x3LyEt-uiBFCUw65oc0y5g,0.0,0.0,0.0,"The experience happened on Tuesday, July 3rd, ...",2018-07-04 01:02:30
2,2AwhlMOMsYXJvskZKKG2GA,Tequila Sunrise,1551 N Green St,Brownsburg,IN,46112,39.867338,-86.390529,3.5,112,1.0,"{'NoiseLevel': ""u'average'"", 'BusinessParking'...","Restaurants, Mexican",zZcbhKIqxcgjWO8LJUJUaQ,0.0,0.0,0.0,Phe-nom-nom-nom-enal! Took my wife here today ...,2016-10-13 02:00:28
3,2AwhlMOMsYXJvskZKKG2GA,Tequila Sunrise,1551 N Green St,Brownsburg,IN,46112,39.867338,-86.390529,3.5,112,1.0,"{'NoiseLevel': ""u'average'"", 'BusinessParking'...","Restaurants, Mexican",ZoN7GbeNGFyPwwKdMEr_Mw,0.0,0.0,0.0,"Carne Asada tacos were good, hubby had chicken...",2016-07-12 15:05:25
4,2AwhlMOMsYXJvskZKKG2GA,Tequila Sunrise,1551 N Green St,Brownsburg,IN,46112,39.867338,-86.390529,3.5,112,1.0,"{'NoiseLevel': ""u'average'"", 'BusinessParking'...","Restaurants, Mexican",NlQl6Z5UOFmAgSiMo0CbHA,0.0,0.0,0.0,Delicious food. Very generous portions. Very...,2018-07-03 23:28:02
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3584005,2O2K6SXPWv56amqxCECd4w,The Plum Pit,4405 Pennell Rd,Aston,DE,19014,39.856185,-75.427725,4.5,14,1.0,"{'RestaurantsDelivery': 'False', 'BusinessAcce...","Restaurants, Comfort Food, Food, Food Trucks, ...",Kt3gFeW1rhZz7RuiV-6Tcw,0.0,0.0,0.0,This is my favorite food truck! I only wish I ...,2019-07-14 14:25:35
3584006,2O2K6SXPWv56amqxCECd4w,The Plum Pit,4405 Pennell Rd,Aston,DE,19014,39.856185,-75.427725,4.5,14,1.0,"{'RestaurantsDelivery': 'False', 'BusinessAcce...","Restaurants, Comfort Food, Food, Food Trucks, ...",ruy3Ycey_gGbwkE_3TX1Fg,1.0,0.0,1.0,This food truck was stupid. Stupidly delicious...,2021-06-25 23:22:26
3584007,2O2K6SXPWv56amqxCECd4w,The Plum Pit,4405 Pennell Rd,Aston,DE,19014,39.856185,-75.427725,4.5,14,1.0,"{'RestaurantsDelivery': 'False', 'BusinessAcce...","Restaurants, Comfort Food, Food, Food Trucks, ...",C_l8NTpvNOEUorEmEOusaA,0.0,0.0,0.0,Bubba never disappoints i go to his fb page an...,2016-12-09 21:38:05
3584008,2O2K6SXPWv56amqxCECd4w,The Plum Pit,4405 Pennell Rd,Aston,DE,19014,39.856185,-75.427725,4.5,14,1.0,"{'RestaurantsDelivery': 'False', 'BusinessAcce...","Restaurants, Comfort Food, Food, Food Trucks, ...",q39JOIkHmIhdmYnjEhZCdQ,0.0,0.0,0.0,The truck was invited to our office for a part...,2020-02-19 22:59:06


In [None]:
vectorizer=TfidfVectorizer(max_features=2000, min_df=1, stop_words='english')
nmf = NMF(n_components=1, solver="mu")

In [None]:
nmf_topics=[]
for i in clean_review_list:
  i=[i]
  X=vectorizer.fit_transform(i)
  idx_to_word = np.array(vectorizer.get_feature_names_out())
  W=nmf.fit_transform(X)
  H=nmf.components_
  for i, topic in enumerate(H):
    nmf_topics.append(((",".join([str(x) for x in idx_to_word [topic.argsort()[-10:]]]))))

ValueError: ignored

In [None]:
nmf_topics[0:20]

['downside,chip,restaurant,salsa,food,place,outstanding,mexican,meal,star',
 'chile,relleno,got,order,water,plate,minute,waitress,taco,food',
 'easily,experience,food,friday,great,wife,clean,enchilada,good,nice',
 'good,friendly,favorite,enchilada,clearly,chicken,carne,brownsburg,hubby,working',
 'delicious,generous,great,lot,menu,mexican,portion,restaurant,service,food',
 'husband,end,fajitas,food,service,meal,check,better,ask,came',
 'dish,decor,decent,clean,increased,waiter,mexican,nice,price,food',
 'authentic,bad,food,mexican,price,good',
 'margarita,pay,rate,star,told,unfortunately,worst,alcohol,place,yes',
 'disappointed,eaten,fajitas,finish,gone,cooking,time,anymore,husband,food',
 'food,great,dipping,pretty,small,waiting,big,love,restaurant,mexican',
 'daily,drink,good,great,mx,price,restaurant,special',
 'carnitas,chip,eaten,fantastic,food,good,great,pretty,salsa,time',
 'minute,party,poor,seated,service,time,took,wo,extremely,food',
 'place,restaurant,staff,suggests,try,vari