In [1]:
import math
import numpy as np
import pandas as pd
import nltk

from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem import LancasterStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer

from nltk.collocations import BigramCollocationFinder, BigramAssocMeasures
from nltk.stem import WordNetLemmatizer
from nltk import word_tokenize

from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
from nltk.stem.wordnet import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()
def lemmatization(reviews):   # lemmatizing
    sentence =[]
    for line in reviews:
        token_words=word_tokenize(line)

        lemma=[]
        for word in token_words:
            lemma.append(lemmatizer.lemmatize(word))
            lemma.append(" ")
        sentence.append("".join(lemma))
    return sentence

In [3]:
data = pd.read_csv('../data/yelp_bubble_tea_reviews_encoded.csv')
data = data[~data['categories'].str.contains('Vietnamese')]
print(data.shape)

(37565, 43)


In [4]:
goodReviews = data[data['review_rating'] >= 4]   # 4 and 5 stars reviews are defined as good reviews
badReviews = data[data['review_rating'] <= 2]   # 1 and 2 stars reviews are defined as bad reviews

In [5]:
goodReviewsList = list(goodReviews['reivew'])
badReviewsList = list(badReviews['reivew'])

### TF-IDF for taro milk tea

#### Add product name

In [6]:
goodCream = goodReviews[goodReviews['reivew'].str.contains('taro milk tea')]   # add!
goodcreamList = list(goodCream['reivew'])  
goodcreamList = lemmatization(goodcreamList)

In [7]:
badCream = badReviews[badReviews['reivew'].str.contains('taro milk tea')]   # add!
badcreamList = list(badCream['reivew'])
badcreamList = lemmatization(badcreamList)

#### Stopwords for products

In [8]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()

import nltk
from nltk.corpus import stopwords
set(stopwords.words('english'))
    
stopwords = nltk.corpus.stopwords.words('english')
newStopWords = ['wa','one','get','would','place','also','good','great','love','come',\
                'bubble','definitely','nice','always','make','make','best','come','came','much','super',\
                'could','know','even','got','went','said','really','give','able','could','made','back',\
                'service','customer','great','time','get','one','love','also','really','try','flavor',\
                'go','best','delicious','go','staff','well','even',\
                'kung','fu','brew','bar','sea','recommend','tea','Boba','boba','milk',\
                ".",'.', ",",":", "''", "'s", "'", "``", "(", ")", "-"]
stopwords.extend(newStopWords)   # adding custom stopwords

#### TF-IDF

In [9]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(2,3),
#                             token_pattern=r'\b[a-zA-Z]{3,}\b',   # three or more alpha characters
                             max_df=0.5,   # ignore appear in more than 30% of documents
                             min_df=3,     # ignore appear in less than 5 documents
                             stop_words=stopwords,
                             max_features=500)

In [10]:
### good reviews

X = vectorizer.fit_transform(goodcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreGood = pd.DataFrame(tf_idf, columns=["score"])
scoreGood["term"] = terms
scoreGood.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreGood.head(50)

Unnamed: 0,score,term
real taro,10.580238,real taro
ice cream,10.039961,ice cream
tried taro,9.927235,tried taro
ordered taro,9.417589,ordered taro
drink taro,7.281367,drink taro
hot taro,6.98173,hot taro
ca wait,6.591288,ca wait
order taro,5.965443,order taro
grass jelly,5.52353,grass jelly
taro real,5.496414,taro real


In [11]:
# scoreGood.to_csv('../data/scoreGood.csv')

In [12]:
### bad reviews

X = vectorizer.fit_transform(badcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreBad = pd.DataFrame(tf_idf, columns=["score"])
scoreBad["term"] = terms
scoreBad.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreBad.head(50)

Unnamed: 0,score,term
ordered taro,7.21651,ordered taro
tasted like,5.62655,tasted like
taro tasted,5.540189,taro tasted
taste like,4.74055,taste like
taro taro,3.953468,taro taro
drink taro,3.787556,drink taro
taro tapioca,3.660935,taro tapioca
like taro,2.96073,like taro
taro powder,2.945392,taro powder
almost half,2.686196,almost half


In [13]:
# scoreBad.to_csv('../data/scoreBad.csv')

#### Look back to reviews

In [14]:
import re
def printReviewByKey(reviews, keyword):
    count = 0
    for line in reviews:
        if keyword in line:
            count += 1
            print(line+'\n \n ------------------------------------------------- \n')
    print(str(count)+' out of '+str(len(reviews))+' reviews. ')

In [15]:
printReviewByKey(goodcreamList, 'taro tapioca')

I wa craving for some bbt and decided to try OneZo since they recently opened up a new location near me . I had tried OneZo when they were at the Waterfront Food Festival . But they only had 1 flavour of the tapioca which wa cactus . Despite it being such an odd flavour , I really enjoyed the texture of the tapioca . We came here on a friday night at around 8pm . It wa n't busy , just 2 people ahead of u . It took u some time to decide what we wanted to order since it wa our first time coming to an actual location . They had quite a few variety and the person at the cash wa very nice about u taking our time . We decided to go with the OneZo milk tea and the taro milk tea . The OneZo came with 3 type of tapioca and to be honest , I could n't really taste the different flavour . But the tapioca texture wa very different than any tapioca that I 've had before . It 's chewy but not sticky , almost like a mochi . Very good . The milk tea wa very standard.. I still prefer the milk tea 's fro

In [16]:
printReviewByKey(badcreamList, 'taro tapioca')

The drink were okay but the service wa bad so that 's why I 'm giving this location specifically a 2 star . We ordered the taro milk tea with taro tapioca and the oolong milk tea with sesame tapioca . Both milk tea were yummy but really sweet , even at 50 % sugar . The taro tapioca wa very chewy and delicious , I really enjoyed it . On the other hand , I love black sesame anything but the sesame tapioca wa meh . It wa very hard and tough to chew on . The grittiness made me feel like I wa chewing on wet sand . Now onto the customer service . The guy at the cashier wa really nice but some girl had to come and intrude while he wa doing his job in taking our order and she wa n't very friendly . She made the whole process more complicated than it needed to be . I already checked in and followed their Instagram for the 10 % off before I paid for the drink , and then a I wa paying she noticed I wa n't alone and did not give me the 10 % unless my boyfriend check in too . I do n't get it . I 'm

### TF-IDF for hokkaido milk tea

#### Add product name

In [17]:
goodCream = goodReviews[goodReviews['reivew'].str.contains('hokkaido milk tea')]   # add!
goodcreamList = list(goodCream['reivew'])  
goodcreamList = lemmatization(goodcreamList)

In [18]:
badCream = badReviews[badReviews['reivew'].str.contains('hokkaido milk tea')]   # add!
badcreamList = list(badCream['reivew'])
badcreamList = lemmatization(badcreamList)

#### Stopwords for products

In [19]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()

import nltk
from nltk.corpus import stopwords
set(stopwords.words('english'))
    
stopwords = nltk.corpus.stopwords.words('english')
newStopWords = ['wa','one','get','would','place','also','good','great','love','come',\
                'bubble','definitely','nice','always','make','make','best','come','came','much','super',\
                'could','know','even','got','went','said','really','give','able','could','made','back',\
                'service','customer','great','time','get','one','love','also','really','try','flavor',\
                'go','best','delicious','go','staff','well','even',\
                'kung','fu','brew','bar','sea','recommend','tea','Boba','boba','milk',\
                ".",'.', ",",":", "''", "'s", "'", "``", "(", ")", "-"]
stopwords.extend(newStopWords)   # adding custom stopwords

#### TF-IDF

In [20]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(2,3),
#                             token_pattern=r'\b[a-zA-Z]{3,}\b',   # three or more alpha characters
                             max_df=0.5,   # ignore appear in more than 30% of documents
                             min_df=1,     # ignore appear in less than 5 documents
                             stop_words=stopwords,
                             max_features=300)

In [21]:
### good reviews

X = vectorizer.fit_transform(goodcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreGood = pd.DataFrame(tf_idf, columns=["score"])
scoreGood["term"] = terms
scoreGood.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreGood.head(50)

Unnamed: 0,score,term
salt cream,3.013619,salt cream
tried hokkaido,2.06949,tried hokkaido
favorite spot,1.914544,favorite spot
salt creme,1.824123,salt creme
drink hokkaido,1.810941,drink hokkaido
okinawa hokkaido,1.80863,okinawa hokkaido
hokkaido yummy,1.732684,hokkaido yummy
hokkaido salt,1.564613,hokkaido salt
favorite hokkaido,1.555808,favorite hokkaido
taro slush,1.527553,taro slush


In [22]:
# scoreGood.to_csv('../data/scoreGood.csv')

In [23]:
### bad reviews

X = vectorizer.fit_transform(badcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreBad = pd.DataFrame(tf_idf, columns=["score"])
scoreBad["term"] = terms
scoreBad.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreBad.head(50)

Unnamed: 0,score,term
salt cream,0.691511,salt cream
nothing like,0.409862,nothing like
taste like,0.394599,taste like
matcha espresso,0.390567,matcha espresso
sugar level,0.321541,sugar level
mild nothing,0.316228,mild nothing
prepared powder syrup,0.316228,prepared powder syrup
powder syrup,0.316228,powder syrup
powder syrup hubby,0.316228,powder syrup hubby
rather prepared,0.316228,rather prepared


In [24]:
# scoreBad.to_csv('../data/scoreBad.csv')

#### Look back to reviews

In [25]:
import re
def printReviewByKey(reviews, keyword):
    count = 0
    for line in reviews:
        if keyword in line:
            count += 1
            print(line+'\n \n ------------------------------------------------- \n')
    print(str(count)+' out of '+str(len(reviews))+' reviews. ')

In [26]:
printReviewByKey(goodcreamList, 'colada')

This is my favorite place for boba . The hokkaido milk tea is my favorite , along with the pina colada . Staff is very friendly ! They have a unique topping that you can opt for called 'sea salt cheese ' , and it 's a foam topping that taste like cheese cake . I do n't normally leave yelp review , but Sweetea need more recognition ! ! 
 
 ------------------------------------------------- 

A very cute place . It is small but it seems like there 's just enough seating if you 're not planning on staying too long . I 've been here a few time and order 4 different drink . I ordered the lychee peachy , hokkaido milk tea , jasmine milk tea , and the pina colada slush . The boba wa nice and chewy . It seemed pretty fresh . The best one out of the 4 is the jasmine milk tea but next time I 'll ask for half sweet . 
 
 ------------------------------------------------- 

2 out of 62 reviews. 


In [27]:
printReviewByKey(badcreamList, 'orange')

This wa my first time coming here and honestly I only came for the glass cup . Their customer service is great and the setting is aesthetically pleasing . BUT their drink taste really off and nothing like how they 're suppose to taste . Their boba serving are very small . Barely anything there . Couple sip and it 's gone . I came with five others and we all ordered something different . We got the Hokkaido , Thai , Rose and Matcha milk tea . Out of all those drink only the matcha wa good . The Thai milk tea taste watered down and oddly very orange than normal . The rose milk tea taste like body spray . Very difficult to drink because of how strong the rose is . Lastly , the hokkaido milk tea ha a very thick texture and it taste more like almond milk . Very disappointing ! I do n't plan on coming back . Sorry 
 
 ------------------------------------------------- 

1 out of 7 reviews. 


### TF-IDF for jasmine milk tea

#### Add product name

In [28]:
goodCream = goodReviews[goodReviews['reivew'].str.contains('jasmine milk tea')]   # add!
goodcreamList = list(goodCream['reivew'])  
goodcreamList = lemmatization(goodcreamList)

In [29]:
badCream = badReviews[badReviews['reivew'].str.contains('jasmine milk tea')]   # add!
badcreamList = list(badCream['reivew'])
badcreamList = lemmatization(badcreamList)

#### Stopwords for products

In [30]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()

import nltk
from nltk.corpus import stopwords
set(stopwords.words('english'))
    
stopwords = nltk.corpus.stopwords.words('english')
newStopWords = ['wa','one','get','would','place','also','good','great','love','come',\
                'bubble','definitely','nice','always','make','make','best','come','came','much','super',\
                'could','know','even','got','went','said','really','give','able','could','made','back',\
                'service','customer','great','time','get','one','love','also','really','try','flavor',\
                'go','best','delicious','go','staff','well','even',\
                'kung','fu','brew','bar','sea','recommend','tea','Boba','boba','milk',\
                ".",'.', ",",":", "''", "'s", "'", "``", "(", ")", "-"]
stopwords.extend(newStopWords)   # adding custom stopwords

#### TF-IDF

In [31]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(2,3),
#                             token_pattern=r'\b[a-zA-Z]{3,}\b',   # three or more alpha characters
                             max_df=0.5,   # ignore appear in more than 30% of documents
                             min_df=3,     # ignore appear in less than 5 documents
                             stop_words=stopwords,
                             max_features=300)

In [32]:
### good reviews

X = vectorizer.fit_transform(goodcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreGood = pd.DataFrame(tf_idf, columns=["score"])
scoreGood["term"] = terms
scoreGood.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreGood.head(50)

Unnamed: 0,score,term
ice cream,8.462452,ice cream
ordered jasmine,8.264016,ordered jasmine
drink jasmine,6.389647,drink jasmine
popcorn chicken,5.607532,popcorn chicken
soft chewy,4.67303,soft chewy
egg pudding,4.576706,egg pudding
stamp card,4.555919,stamp card
sweetness level,4.395901,sweetness level
favorite jasmine,4.175473,favorite jasmine
new favorite,3.892657,new favorite


In [33]:
# scoreGood.to_csv('../data/scoreGood.csv')

In [34]:
### bad reviews

X = vectorizer.fit_transform(badcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreBad = pd.DataFrame(tf_idf, columns=["score"])
scoreBad["term"] = terms
scoreBad.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreBad.head(50)

Unnamed: 0,score,term
ordered jasmine,5.902985,ordered jasmine
tasted like,5.451049,tasted like
jasmine tasted,3.464454,jasmine tasted
taste like,2.941505,taste like
friend ordered,2.752838,friend ordered
girl working,2.707107,girl working
drink jasmine,2.664816,drink jasmine
red bean,2.307881,red bean
stamp card,2.30652,stamp card
drink taste,2.093544,drink taste


In [35]:
# scoreBad.to_csv('../data/scoreBad.csv')

#### Look back to reviews

In [36]:
import re
def printReviewByKey(reviews, keyword):
    count = 0
    for line in reviews:
        if keyword in line:
            count += 1
            print(line+'\n \n ------------------------------------------------- \n')
    print(str(count)+' out of '+str(len(reviews))+' reviews. ')

In [37]:
printReviewByKey(goodcreamList, 'almond')

I love everything about this place and is now my favorite boba place to go to . The environment : Not only wa the atmosphere so relaxing but even the music wa very calming . I would choose this place to study or work on thing I have to work on in a heart beat . The third time I went here I had my friend check the place out with me . I wa able to hear everything she said because the music is not loud at all and trust me that 's a huge thing . I 've been to so many boba place and it 's rare to find a place that ha such soothing music . Customer service : It wa honestly one of the best I 've ever experienced . I 've been here three time already and I always feel so welcomed . They 're always so nice and patient . For my office , it wa my time to get u boba so I called to order and I will tell you , some of my friend are picky ... When I ordered for about 8 people the person on the phone took the time to really understand what I said and got all of them correctly . I told him that I would 

In [38]:
printReviewByKey(badcreamList, 'red bean')

Decided to try this place since we were in the area and wa wanting some boba a an afternoon snack but wa sorely disappointed here with all those high rating and review ! Maybe it wa a bad day for them but the boba wa way over cooked , it wa mush and the drink were watered down . We ordered a jasmine milk tea and it wa watered down with the mushy boba . Then we got the red bean milk tea with boba and lychee.. that tasted more like lychee milk tea with bit of red bean in it . I mean there are good thing here too like the place wa clean and price are great ! The fried tofu wa okay , it nothing to write home about but it good . The sauce it came with wa the best part ! Haha we may give this place a second chance but we still like JT boba house so far in avondale ! *edit lowered to ONE star cuz the person who work there must think theyre so cool cuz they pulled right into the handicapped spot right in front and he looked like he wa maybe in his 20 ... with his gf or something walking out li

### TF-IDF for oolong milk tea	

#### Add product name

In [39]:
goodCream = goodReviews[goodReviews['reivew'].str.contains('oolong milk tea')]   # add!
goodcreamList = list(goodCream['reivew'])  
goodcreamList = lemmatization(goodcreamList)

In [40]:
badCream = badReviews[badReviews['reivew'].str.contains('oolong milk tea')]   # add!
badcreamList = list(badCream['reivew'])
badcreamList = lemmatization(badcreamList)

#### Stopwords for products

In [41]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()

import nltk
from nltk.corpus import stopwords
set(stopwords.words('english'))
    
stopwords = nltk.corpus.stopwords.words('english')
newStopWords = ['wa','one','get','would','place','also','good','great','love','come',\
                'bubble','definitely','nice','always','make','make','best','come','came','much','super',\
                'could','know','even','got','went','said','really','give','able','could','made','back',\
                'service','customer','great','time','get','one','love','also','really','try','flavor',\
                'go','best','delicious','go','staff','well','even',\
                'kung','fu','brew','bar','sea','recommend','tea','Boba','boba','milk',\
                ".",'.', ",",":", "''", "'s", "'", "``", "(", ")", "-"]
stopwords.extend(newStopWords)   # adding custom stopwords

#### TF-IDF

In [42]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(2,3),
#                             token_pattern=r'\b[a-zA-Z]{3,}\b',   # three or more alpha characters
                             max_df=0.5,   # ignore appear in more than 30% of documents
                             min_df=1,     # ignore appear in less than 5 documents
                             stop_words=stopwords,
                             max_features=500)

In [43]:
### good reviews

X = vectorizer.fit_transform(goodcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreGood = pd.DataFrame(tf_idf, columns=["score"])
scoreGood["term"] = terms
scoreGood.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreGood.head(50)

Unnamed: 0,score,term
oreo oolong,6.220452,oreo oolong
roasted oolong,4.826113,roasted oolong
tried oolong,4.544712,tried oolong
grass jelly,4.101853,grass jelly
herbal jelly,3.994994,herbal jelly
oolong honey,3.570775,oolong honey
las vegas,3.37413,las vegas
le sugar,2.961116,le sugar
honey oolong,2.858894,honey oolong
drink oolong,2.846217,drink oolong


In [44]:
# scoreGood.to_csv('../data/scoreGood.csv')

In [45]:
### bad reviews

X = vectorizer.fit_transform(badcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreBad = pd.DataFrame(tf_idf, columns=["score"])
scoreBad["term"] = terms
scoreBad.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreBad.head(50)

Unnamed: 0,score,term
oreo oolong,1.645991,oreo oolong
last week,0.947275,last week
tasted like,0.883007,tasted like
peach oolong,0.672893,peach oolong
brown sugar,0.663828,brown sugar
like oolong,0.640594,like oolong
tapioca tapioca,0.569532,tapioca tapioca
oolong taste,0.565787,oolong taste
taste like,0.554836,taste like
grass jelly,0.498655,grass jelly


In [46]:
# scoreBad.to_csv('../data/scoreBad.csv')

#### Look back to reviews

In [47]:
import re
def printReviewByKey(reviews, keyword):
    count = 0
    for line in reviews:
        if keyword in line:
            count += 1
            print(line+'\n \n ------------------------------------------------- \n')
    print(str(count)+' out of '+str(len(reviews))+' reviews. ')

In [48]:
printReviewByKey(goodcreamList, 'peach oolong')

My friend drove me here one time and I did n't wan na step in after seeing the line outside . BUT NOW I know why . I 'm seriously here everyday or every other day now . My boyfriend and I are suuuuper inlove with the drink here . Especially the oolong milk tea . The first thing I had ever ordered wa a white gourd milk tea , with grass jelly -- bc I do not like the tapioca boba thing . Their grass jelly is soooooo good . I could seriously eat it with just milk , lol . Then I tried some drink off of my friend which were super delicious too . Mango snow ( milk ) , because I do n't like fruity thing , this mango milk slush wa gr8 with the mango jelly ! My little sister got a peach oolong tea ( first time I ever had oolong ) , it took a couple sip to get used to.. But now I 'm inlove with this tea . Like I said , I 'm probably here every other day now . Even though it 's quite a drive from my house , it is so worth it ! Now I usually just always get white gourd milk tea with grass jelly . A

In [49]:
printReviewByKey(badcreamList, '50 cent')

I think this is the first time I ever gave a one-star review to an overly-hyped boba shop . But this is also the first time I ever gave up on finishing an order of milk tea . Normally , I can drink a boba milk tea within 10 minute of receiving it . The oreo oolong milk tea that I got from Kung Fu Tea , however , sat around from 3pm to around 3am before I finally convinced my friend to finish it after becoming dehydrated from a night of partying . She did n't even like it when she first tried it with me and only gave in after not having anything else around to quench her thirst . That 's how disgusting it tasted . None of u boba lover wanted any of it . Furthermore , bogus charge . They had `` Oreo Oolong Milk Tea '' listed a one of their flavored milk tea option and so I assumed it wa the same listed price a all the other flavored milk tea drink , because , you know , that 's how every other boba shop doe it . But surprise ! I find out on my receipt that they charge you an extra 50 cen

### TF-IDF for almond milk tea

#### Add product name

In [50]:
goodCream = goodReviews[goodReviews['reivew'].str.contains('almond milk tea')]   # add!
goodcreamList = list(goodCream['reivew'])  
goodcreamList = lemmatization(goodcreamList)

In [51]:
badCream = badReviews[badReviews['reivew'].str.contains('almond milk tea')]   # add!
badcreamList = list(badCream['reivew'])
badcreamList = lemmatization(badcreamList)

#### Stopwords for products

In [52]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()

import nltk
from nltk.corpus import stopwords
set(stopwords.words('english'))
    
stopwords = nltk.corpus.stopwords.words('english')
newStopWords = ['wa','one','get','would','place','also','good','great','love','come',\
                'bubble','definitely','nice','always','make','make','best','come','came','much','super',\
                'could','know','even','got','went','said','really','give','able','could','made','back',\
                'service','customer','great','time','get','one','love','also','really','try','flavor',\
                'go','best','delicious','go','staff','well','even',\
                'kung','fu','brew','bar','sea','recommend','tea','Boba','boba','milk',\
                ".",'.', ",",":", "''", "'s", "'", "``", "(", ")", "-"]
stopwords.extend(newStopWords)   # adding custom stopwords

#### TF-IDF

In [53]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(2,3),
#                             token_pattern=r'\b[a-zA-Z]{3,}\b',   # three or more alpha characters
                             max_df=0.5,   # ignore appear in more than 30% of documents
                             min_df=3,     # ignore appear in less than 5 documents
                             stop_words=stopwords,
                             max_features=300)

In [54]:
### good reviews

X = vectorizer.fit_transform(goodcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreGood = pd.DataFrame(tf_idf, columns=["score"])
scoreGood["term"] = terms
scoreGood.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreGood.head(50)

Unnamed: 0,score,term
ordered almond,6.465387,ordered almond
ice cream,5.403976,ice cream
hot almond,5.013151,hot almond
free wifi,4.764461,free wifi
taro almond,3.884185,taro almond
red bean,3.676254,red bean
drink like,3.67426,drink like
taste like,3.671729,taste like
las vegas,3.453823,las vegas
pistachio almond,3.344249,pistachio almond


In [55]:
# scoreGood.to_csv('../data/scoreGood.csv')

In [56]:
### bad reviews

X = vectorizer.fit_transform(badcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreBad = pd.DataFrame(tf_idf, columns=["score"])
scoreBad["term"] = terms
scoreBad.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreBad.head(50)

Unnamed: 0,score,term
tasted like,5.874271,tasted like
ordered almond,4.614054,ordered almond
taste like,4.536702,taste like
almond tasted,3.856191,almond tasted
ice cream,2.60758,ice cream
different drink,2.232833,different drink
like sugar,2.128612,like sugar
small almond,2.106921,small almond
almond tasted like,2.051992,almond tasted like
like water,2.00141,like water


In [57]:
# scoreBad.to_csv('../data/scoreBad.csv')

#### Look back to reviews

In [58]:
import re
def printReviewByKey(reviews, keyword):
    count = 0
    for line in reviews:
        if keyword in line:
            count += 1
            print(line+'\n \n ------------------------------------------------- \n')
    print(str(count)+' out of '+str(len(reviews))+' reviews. ')

In [59]:
printReviewByKey(goodcreamList, 'pistachio')

so good and the employee are so sweet ! love their nagasaki and pistachio almond milk tea : ) 
 
 ------------------------------------------------- 

I feel pretty lucky to know that a boba shop ha opened up so close to my house . Cloud Tea is a neat shop with plenty of seating and some interesting drink on the menu . I tried the Nagasaki milk tea with boba . The drink taste like a milky , creamy Chrysanthemum tea which is also shaken to death on those cool drink machine . My sister tried the Matcha milk tea and it taste a bit sweeter than I expected . Actually , the sweetness in both drink stuck to the roof of my mouth . On the other hand , the boba is good and taste pretty fresh . I came back again and tried the pistachio almond milk tea . Woah ! It definitely taste like what the name say . I thoroughly enjoyed it and I had a good time getting some studying done . 
 
 ------------------------------------------------- 

Pretty good place , been there twice now , the staff is friendly 

In [60]:
printReviewByKey(badcreamList, ' tasted like')

Been here twice since they 've opened this location . First time I ordered regular milk tea and Oolong milk tea a suggested , but both were disappointing . I went back a second time hoping the first time wa just a bad day , but they failed their almond milk tea a well . I 've known Kung Fu Tea to have pretty good milk tea . I 'm assuming since this location is new , they need major training how to do milk tea since all of the milk tea I 've tried tasted like milky water . 
 
 ------------------------------------------------- 

Whyyyyyyy whhyyyy fat dumpling ? Why you got ta do me dirty like that ? ! We could have had something special . My boyfriend and I are hard core boba head and we just needed our fix tonight . Being that it 's our first time here , we even went a far a buying 3 drink total between the 2 of u . You really let u down ! Weak flavor , mushy boba , whack customer service - all terrible . Here 's the story : I ordered the lemon honey green tea with half boba . Man , I d

### TF-IDF for matcha green tea

#### Add product name

In [61]:
goodCream = goodReviews[goodReviews['reivew'].str.contains('matcha')]   # add!
goodcreamList = list(goodCream['reivew'])  
goodcreamList = lemmatization(goodcreamList)

In [62]:
badCream = badReviews[badReviews['reivew'].str.contains('matcha')]   # add!
badcreamList = list(badCream['reivew'])
badcreamList = lemmatization(badcreamList)

#### Stopwords for products

In [63]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()

import nltk
from nltk.corpus import stopwords
set(stopwords.words('english'))
    
stopwords = nltk.corpus.stopwords.words('english')
newStopWords = ['wa','one','get','would','place','also','good','great','love','come',\
                'bubble','definitely','nice','always','make','make','best','come','came','much','super',\
                'could','know','even','got','went','said','really','give','able','could','made','back',\
                'service','customer','great','time','get','one','love','also','really','try','flavor',\
                'go','best','delicious','go','staff','well','even',\
                'kung','fu','brew','bar','sea','recommend','tea','Boba','boba','milk',\
                ".",'.', ",",":", "''", "'s", "'", "``", "(", ")", "-"]
stopwords.extend(newStopWords)   # adding custom stopwords

#### TF-IDF

In [64]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(2,3),
#                             token_pattern=r'\b[a-zA-Z]{3,}\b',   # three or more alpha characters
                             max_df=0.5,   # ignore appear in more than 30% of documents
                             min_df=1,     # ignore appear in less than 5 documents
                             stop_words=stopwords,
                             max_features=500)

In [65]:
### good reviews

X = vectorizer.fit_transform(goodcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreGood = pd.DataFrame(tf_idf, columns=["score"])
scoreGood["term"] = terms
scoreGood.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreGood.head(50)

Unnamed: 0,score,term
ice cream,66.613606,ice cream
matcha latte,51.681842,matcha latte
matcha green,36.993329,matcha green
red bean,36.97916,red bean
ordered matcha,23.754941,ordered matcha
matcha drink,23.57199,matcha drink
tried matcha,22.121449,tried matcha
shaved ice,20.961394,shaved ice
matcha ice,20.1383,matcha ice
matcha ice cream,17.962184,matcha ice cream


In [66]:
# scoreGood.to_csv('../data/scoreGood.csv')

In [67]:
### bad reviews

X = vectorizer.fit_transform(badcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreBad = pd.DataFrame(tf_idf, columns=["score"])
scoreBad["term"] = terms
scoreBad.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreBad.head(50)

Unnamed: 0,score,term
ice cream,9.699343,ice cream
ordered matcha,5.874196,ordered matcha
matcha latte,5.7802,matcha latte
tasted like,5.18846,tasted like
matcha green,5.028576,matcha green
red bean,4.961491,red bean
soft serve,4.570222,soft serve
matcha tapioca,4.493762,matcha tapioca
taste like,3.997574,taste like
green matcha,3.600116,green matcha


In [68]:
# scoreBad.to_csv('../data/scoreBad.csv')

#### Look back to reviews

In [69]:
import re
def printReviewByKey(reviews, keyword):
    count = 0
    for line in reviews:
        if keyword in line:
            count += 1
            print(line+'\n \n ------------------------------------------------- \n')
    print(str(count)+' out of '+str(len(reviews))+' reviews. ')

In [70]:
printReviewByKey(goodcreamList, 'tiramisu mousse')

I love matcha . So when I passed by this place after an OTF workout , I knew I had to drop in when it opened . Well it finally opened and they offered 20 % off the order for grand opening weekend . Score ! But the price are n't bad to begin with . This place ha such a cute atmosphere and lot of table for seating . Everything blend in so pretty- the color , the furniture , the lighting . I can go on . I feel like atmosphere is important . The owner explained that the matcha is straight from Japan and that their drink are n't too sweet to begin with . And that is true because with 25 % sweetness level , I found it perfect since I 'm not a sweet fan of drink . All the worker are knowledgeable about the drink and friendly . Imperial Matcha- 5 star for this drink itself . I love matcha and I love it straight and simple . Simple , pure , refreshing . Definitely have that matcha taste . You have the option of making this a latte . Absolutely delicious and way better than Starbucks . *when you

In [71]:
printReviewByKey(badcreamList, 'instead')

The drink were really plain . Was really looking forward to trying out the cold drink but it wa a let down since the matcha did n't seem to be blended with the milk in the bubble tea it tasted really powdery ... Go try the matcha drink from other Japanese matcha cafe around Toronto instead . The staff were really friendly but the interior wa so crammed . 
 
 ------------------------------------------------- 

It definitely wa not zero degree , or even a reasonably comfortable temperature , inside Zero Degrees when we went . The place wa packed full of people due to it being the Grand Opening , but there 's a problem when it is cooler outside in the shade on an Arizona afternoon in May than in the shop . The line wa long and we almost left because it wa n't moving very fast but decided we could tough it out for the BOGO deal . While waiting we were asked to check-in on the Yelp app . ( Probably to help speed thing up , but also to get the free publicity ... hmmm ... ? ) When I paid we d

### TF-IDF for mango green tea

#### Add product name

In [72]:
goodCream = goodReviews[goodReviews['reivew'].str.contains('mango green tea')]   # add!
goodcreamList = list(goodCream['reivew'])  
goodcreamList = lemmatization(goodcreamList)

In [73]:
badCream = badReviews[badReviews['reivew'].str.contains('mango green tea')]   # add!
badcreamList = list(badCream['reivew'])
badcreamList = lemmatization(badcreamList)

#### Stopwords for products

In [74]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()

import nltk
from nltk.corpus import stopwords
set(stopwords.words('english'))
    
stopwords = nltk.corpus.stopwords.words('english')
newStopWords = ['wa','one','get','would','place','also','good','great','love','come',\
                'bubble','definitely','nice','always','make','make','best','come','came','much','super',\
                'could','know','even','got','went','said','really','give','able','could','made','back',\
                'service','customer','great','time','get','one','love','also','really','try','flavor',\
                'go','best','delicious','go','staff','well','even',\
                'kung','fu','brew','bar','sea','recommend','tea','Boba','boba','milk','ordered',\
                ".",'.', ",",":", "''", "'s", "'", "``", "(", ")", "-"]
stopwords.extend(newStopWords)   # adding custom stopwords

#### TF-IDF

In [75]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(2,3),
#                             token_pattern=r'\b[a-zA-Z]{3,}\b',   # three or more alpha characters
                             max_df=0.5,   # ignore appear in more than 30% of documents
                             min_df=1,     # ignore appear in less than 5 documents
                             stop_words=stopwords,
                             max_features=300)

In [76]:
### good reviews

X = vectorizer.fit_transform(goodcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreGood = pd.DataFrame(tf_idf, columns=["score"])
scoreGood["term"] = terms
scoreGood.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreGood.head(50)

Unnamed: 0,score,term
tried mango green,2.887698,tried mango green
tried mango,2.887698,tried mango
green mango,2.485767,green mango
passion fruit,1.911905,passion fruit
real mango,1.820223,real mango
taste like,1.818985,taste like
green latte,1.815828,green latte
stamp card,1.8026,stamp card
drink mango,1.795674,drink mango
drink mango green,1.795674,drink mango green


In [77]:
# scoreGood.to_csv('../data/scoreGood.csv')

In [78]:
### bad reviews

X = vectorizer.fit_transform(badcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreBad = pd.DataFrame(tf_idf, columns=["score"])
scoreBad["term"] = terms
scoreBad.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreBad.head(50)

Unnamed: 0,score,term
tasted like,0.79729,tasted like
oolong green,0.565972,oolong green
drink cost,0.544295,drink cost
green tasted,0.520663,green tasted
overpowering girl,0.5,overpowering girl
overpowering girl little,0.5,overpowering girl little
register mango,0.5,register mango
register mango green,0.5,register mango green
plain mango bill,0.459137,plain mango bill
plain mango,0.459137,plain mango


In [79]:
# scoreBad.to_csv('../data/scoreBad.csv')

#### Look back to reviews

In [80]:
import re
def printReviewByKey(reviews, keyword):
    count = 0
    for line in reviews:
        if keyword in line:
            count += 1
            print(line+'\n \n ------------------------------------------------- \n')
    print(str(count)+' out of '+str(len(reviews))+' reviews. ')

In [81]:
printReviewByKey(goodcreamList, 'winter melon')

We came here probably 4 to 5 time a week ! It is soooo addicting ! ! ! I love their milk tea . It is so creamy almost like condensed milk . You have a choice to choose from green and black tea . My friend really like the winter melon tea cap and the mango green tea . Their boba is just right , but the pudding is too big to drink from the straw . I DEFINITELY recommend this place Second update : They have a new recipe and it is DELICIOUS ! ! ! ! It 's call the mango smoothie ! ! ! GET IT ! ! ! : ) 
 
 ------------------------------------------------- 

I have come here over 20 time since they opened and tried various drink . The owner are always super friendly and they do n't hesitate to modify our drink to our liking ( adding more ice , adjusting sweetness etc . ) My favorite drink are the winter melon tea with cloud cap , white taro smoothie , watermelon smoothie , watermelon green tea , mango green tea , and green apple smoothie . This might seem like an exhaustive list , but I swear

In [82]:
printReviewByKey(badcreamList, '50 cent')

0 out of 13 reviews. 


### TF-IDF for boba

#### Add product name

In [83]:
goodCream = goodReviews[goodReviews['reivew'].str.contains(r'(Boba | boba | Pearl | pearl | Bubble | bubble)')]   # add!
goodcreamList = list(goodCream['reivew'])  
goodcreamList = lemmatization(goodcreamList)

  """Entry point for launching an IPython kernel.


In [84]:
badCream = badReviews[badReviews['reivew'].str.contains(r'(Boba | boba | Pearl | pearl | Bubble | bubble)')]   # add!
badcreamList = list(badCream['reivew'])
badcreamList = lemmatization(badcreamList)

  """Entry point for launching an IPython kernel.


#### Stopwords for products

In [85]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()

import nltk
from nltk.corpus import stopwords
set(stopwords.words('english'))
    
stopwords = nltk.corpus.stopwords.words('english')
newStopWords = ['wa','one','get','would','place','also','good','great','love','come',\
                'definitely','nice','always','make','make','best','come','came','much','super',\
                'could','know','even','got','went','said','really','give','able','could','made','back',\
                'service','customer','great','time','get','one','love','also','really','try','flavor',\
                'go','best','delicious','go','staff','well','even','tea',\
                'kung','fu','brew','bar','sea','recommend','milk','ordered',\
                ".",'.', ",",":", "''", "'s", "'", "``", "(", ")", "-"]
stopwords.extend(newStopWords)   # adding custom stopwords

#### TF-IDF

In [86]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(2,3),
#                             token_pattern=r'\b[a-zA-Z]{3,}\b',   # three or more alpha characters
                             max_df=0.5,   # ignore appear in more than 30% of documents
                             min_df=5,     # ignore appear in less than 5 documents
                             stop_words=stopwords,
                             max_features=300)

In [87]:
### good reviews

X = vectorizer.fit_transform(goodcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreGood = pd.DataFrame(tf_idf, columns=["score"])
scoreGood["term"] = terms
scoreGood.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreGood.head(50)

Unnamed: 0,score,term
ice cream,458.515133,ice cream
boba drink,397.306342,boba drink
boba shop,279.614868,boba shop
favorite boba,252.599587,favorite boba
boba spot,233.274168,boba spot
honey boba,205.564797,honey boba
las vegas,160.167126,las vegas
ca wait,148.765389,ca wait
red bean,147.164911,red bean
like boba,145.729092,like boba


In [88]:
# scoreGood.to_csv('../data/scoreGood.csv')

In [89]:
### bad reviews

X = vectorizer.fit_transform(badcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreBad = pd.DataFrame(tf_idf, columns=["score"])
scoreBad["term"] = terms
scoreBad.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreBad.head(50)

Unnamed: 0,score,term
boba drink,62.267278,boba drink
ice cream,59.234144,ice cream
tasted like,56.279684,tasted like
taste like,55.312029,taste like
boba shop,38.575643,boba shop
boba boba,30.758148,boba boba
drink boba,28.222052,drink boba
taro boba,26.587309,taro boba
bubble shop,24.54377,bubble shop
15 minute,23.937133,15 minute


In [90]:
# scoreBad.to_csv('../data/scoreBad.csv')

#### Look back to reviews

In [91]:
import re
def printReviewByKey(reviews, keyword):
    count = 0
    for line in reviews:
        if keyword in line:
            count += 1
            print(line+'\n \n ------------------------------------------------- \n')
    print(str(count)+' out of '+str(len(reviews))+' reviews. ')

In [92]:
printReviewByKey(goodcreamList, 'brcown sugar')

0 out of 12388 reviews. 


In [93]:
printReviewByKey(badcreamList, 'soft')

This boba wa trash over cooked and to dam soft never going here again . As well the tea wa watered down my Xbox can make better tea 
 
 ------------------------------------------------- 

Huh.. never had a bubble tea I did n't like.. until today that is . I had a chance to try both the honey and the almond milk tea . They both tasted strongly of pre-made mix and not fresh at all , mind you I would n't count myself a overly picky either . Perhaps in the past this wa the best you could get around here , but there are much better option within ~5 minute of this place , and a metric ton of option if you 're willing to go a little further into Chinatown . I did not try any snack or food here so I ca n't comment on that . To their credit I did find the boba to be a nice consistency ( soft but not soggy ) and the people working there were polite and helpful . 
 
 ------------------------------------------------- 

Liquid nitrogen ice cream is something you have to experience at least once . A

### TF-IDF for sea salt cream

#### Add product name

In [94]:
goodCream = goodReviews[goodReviews['reivew'].str.contains(r'(sea salt cream | cloud cap)')]   # add!
goodcreamList = list(goodCream['reivew'])  
goodcreamList = lemmatization(goodcreamList)

  """Entry point for launching an IPython kernel.


In [95]:
badCream = badReviews[badReviews['reivew'].str.contains(r'(sea salt cream | cloud cap)')]   # add!
badcreamList = list(badCream['reivew'])
badcreamList = lemmatization(badcreamList)

  """Entry point for launching an IPython kernel.


#### Stopwords for products

In [96]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()

import nltk
from nltk.corpus import stopwords
set(stopwords.words('english'))
    
stopwords = nltk.corpus.stopwords.words('english')
newStopWords = ['wa','one','get','would','place','also','good','great','love','come',\
                'definitely','nice','always','make','make','best','come','came','much','super',\
                'could','know','even','got','went','said','really','give','able','could','made','back',\
                'service','customer','great','time','get','one','love','also','really','try','flavor',\
                'go','best','delicious','go','staff','well','even','tea',\
                'kung','fu','brew','bar','sea','recommend','milk','ordered',\
                ".",'.', ",",":", "''", "'s", "'", "``", "(", ")", "-"]
stopwords.extend(newStopWords)   # adding custom stopwords

#### TF-IDF

In [97]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(2,3),
#                             token_pattern=r'\b[a-zA-Z]{3,}\b',   # three or more alpha characters
                             max_df=0.5,   # ignore appear in more than 30% of documents
                             min_df=1,     # ignore appear in less than 5 documents
                             stop_words=stopwords,
                             max_features=300)

In [98]:
### good reviews

X = vectorizer.fit_transform(goodcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreGood = pd.DataFrame(tf_idf, columns=["score"])
scoreGood["term"] = terms
scoreGood.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreGood.head(50)

Unnamed: 0,score,term
cloud cap,7.667535,cloud cap
cream boba,5.09025,cream boba
salt cream boba,5.09025,salt cream boba
latte salt,5.087935,latte salt
latte salt cream,5.087935,latte salt cream
boba spot,4.985343,boba spot
jasmine latte,4.613687,jasmine latte
boba shop,4.49431,boba shop
honey boba,4.466356,honey boba
hokkaido salt cream,4.091803,hokkaido salt cream


In [99]:
# scoreGood.to_csv('../data/scoreGood.csv')

In [100]:
### bad reviews

X = vectorizer.fit_transform(badcreamList)
terms = vectorizer.get_feature_names()
tf_idf = pd.DataFrame(X.toarray().transpose(), index=terms)
tf_idf = tf_idf.sum(axis=1)
scoreBad = pd.DataFrame(tf_idf, columns=["score"])
scoreBad["term"] = terms
scoreBad.sort_values(by="score", ascending=False, inplace=True)   # generate TF-IDF scores
scoreBad.head(50)

Unnamed: 0,score,term
jasmine green,0.935124,jasmine green
tasted like,0.874532,tasted like
boba shop,0.781859,boba shop
lot boba,0.707107,lot boba
lot boba town,0.707107,lot boba town
drink salt,0.672088,drink salt
drink salt cream,0.672088,drink salt cream
bitter aftertaste,0.657544,bitter aftertaste
pretty bad,0.603718,pretty bad
pretty bad taste,0.603718,pretty bad taste


In [101]:
# scoreBad.to_csv('../data/scoreBad.csv')

#### Look back to reviews

In [102]:
import re
def printReviewByKey(reviews, keyword):
    count = 0
    for line in reviews:
        if keyword in line:
            count += 1
            print(line+'\n \n ------------------------------------------------- \n')
    print(str(count)+' out of '+str(len(reviews))+' reviews. ')

In [103]:
printReviewByKey(goodcreamList, 'brcown sugar')

0 out of 164 reviews. 


In [104]:
printReviewByKey(badcreamList, 'soft')

0 out of 13 reviews. 
