In [1]:
import numpy as np
import pandas as pd
import time
import random
from random import sample
import string
import pickle

# nltk models
import nltk
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.stem.wordnet import WordNetLemmatizer

#spaCy
import spacy
from spacy import displacy

# gensim models
import gensim
from gensim import corpora, models
from gensim.models.phrases import Phrases, Phraser
from gensim.models.ldamulticore import LdaMulticore

# Visualize topics
import pyLDAvis
import pyLDAvis.gensim



# Meet spaCy - "It's minimal and opinionated"

spaCy is a free, **open-source** library for advanced **Natural Language Processing (NLP)** in Python.

Features:

-  Tokenization
-  POS Tagging
-  Dependency Parsing
-  Lemmatization
-  Sentence Detection
-  Entity Recognition

And more...

https://spacy.io/


In [2]:
nlp = spacy.load('en_core_web_sm')

In [58]:
text = 'Best restaurant in Newcastle. Delicious cocktails and it has a really friendly atmosphere.'

doc = nlp(text)
doc

Best restaurant in Newcastle. Delicious cocktails and it has a really friendly atmosphere.

In [4]:
token_text = [token.text for token in doc]
token_pos = [token.pos_ for token in doc]
token_lemma = [token.lemma_ for token in doc]
token_entity = [token.ent_type_ for token in doc]
token_stop = [token.is_stop for token in doc]
token_vec = [token.vector for token in doc]

headers = ['token_text','token_lemma','token_pos','token_entity','token_is_stop','token_vec']

pd.DataFrame(list(zip(token_text, token_lemma, token_pos, token_entity,token_stop,token_vec)),columns=headers)

Unnamed: 0,token_text,token_lemma,token_pos,token_entity,token_is_stop,token_vec
0,Best,good,ADJ,,False,"[-1.6054, 4.53702, -0.672109, -2.05529, -1.600..."
1,restaurant,restaurant,NOUN,,False,"[1.33561, 0.368323, 2.64288, 0.373385, -2.4033..."
2,in,in,ADP,,True,"[1.44569, 1.10417, -0.406471, 1.28426, 1.65253..."
3,Newcastle,newcastle,PROPN,GPE,False,"[-2.38429, 0.409086, 3.07898, 0.594176, -0.754..."
4,.,.,PUNCT,,False,"[0.474397, 1.99392, 2.95767, -0.329908, 1.3399..."
5,Delicious,delicious,PROPN,,False,"[-3.31449, 3.54701, -0.886751, 2.62596, -3.169..."
6,cocktails,cocktail,NOUN,,False,"[0.890365, 3.60857, -1.63054, -2.22932, 1.359,..."
7,and,and,CCONJ,,True,"[0.74245, -1.03995, -0.239206, -1.88797, 2.333..."
8,it,-PRON-,PRON,,True,"[-1.6597, 0.930871, 3.74128, 2.16395, -2.18548..."
9,has,have,VERB,,True,"[-3.41011, 2.17194, -1.65569, -1.99658, 3.2991..."


In [5]:
token_head = [token.head for token in doc]
token_children = [list(token.children) for token in doc]

headers_ = ['token_text','token_head','token_children']

pd.DataFrame(list(zip(token_text, token_head, token_children)),columns=headers_)

Unnamed: 0,token_text,token_head,token_children
0,Best,restaurant,[]
1,restaurant,restaurant,"[Best, in, .]"
2,in,restaurant,[Newcastle]
3,Newcastle,in,[]
4,.,restaurant,[]
5,Delicious,cocktails,[]
6,cocktails,has,"[Delicious, and, it]"
7,and,cocktails,[]
8,it,cocktails,[]
9,has,has,"[cocktails, atmosphere, .]"


In [6]:
displacy.render(doc, style='dep', jupyter=True, options={'distance': 90})

  "__main__", mod_spec)
  "__main__", mod_spec)


# Noun chunks

In [7]:
list(doc.noun_chunks)

[Best restaurant,
 Newcastle,
 Delicious cocktails,
 it,
 a really friendly atmosphere]

# Sentences

In [8]:
list(doc.sents)

[Best restaurant in Newcastle.,
 Delicious cocktails and it has a really friendly atmosphere.]

# Doc2Vec

300 dimensions doc2vec as average of token vectors trained using GloVe on Common Crawl dataset

https://en.wikipedia.org/wiki/Common_Crawl

https://spacy.io/models/en#section-en_vectors_web_lg


In [9]:
nlp = spacy.load('en_core_web_lg')

In [10]:
docs = ['Pork is amazing','Sausage was great','Data Science made simple','Physics studies laws of the universe']

In [11]:
def doc2vec(docs):

    """
    Get doc2vec representations of docs using spaCy pre-trained word vectors
    """
    
    doc_text = []
    doc_vec = []

    for doc in nlp.pipe(docs):
        doc_text.append(doc.text)
        doc_vec.append(doc.vector)
    
    headers = ['doc_text','doc_vec']

    return pd.DataFrame(list(zip(doc_text, doc_vec)),columns=headers)

df = doc2vec(docs)
df

Unnamed: 0,doc_text,doc_vec
0,Pork is amazing,"[-0.332147, 0.185507, 0.2583, 0.130159, 0.1209..."
1,Sausage was great,"[-0.206415, 0.324179, 0.18584, 0.0150927, -7.1..."
2,Data Science made simple,"[-0.26445, 0.0599757, -0.181192, 0.0580605, -0..."
3,Physics studies laws of the universe,"[0.0194767, 0.0151591, -0.171293, -0.183741, -..."


In [12]:
import sklearn
from sklearn.metrics.pairwise import cosine_similarity

docvecs = df['doc_vec'].tolist()
cos_sim = cosine_similarity(docvecs)
df_sim = pd.DataFrame(cos_sim,columns=df['doc_text'].tolist(),index=df['doc_text'].tolist())
df_sim

Unnamed: 0,Pork is amazing,Sausage was great,Data Science made simple,Physics studies laws of the universe
Pork is amazing,1.0,0.816263,0.543974,0.441451
Sausage was great,0.816263,1.0,0.508645,0.422934
Data Science made simple,0.543974,0.508645,1.0,0.744686
Physics studies laws of the universe,0.441451,0.422934,0.744686,1.0


# Let's build an NLP pipeline

In [13]:
#load dataset
path = 'C:\\Users\\schapira.d\\Desktop\\Data Science Meetup\\yelp_reviews_1M.csv'
reviews_df = pd.read_csv(path,encoding='utf-8')
reviews = reviews_df['text'].fillna('').tolist()
ratings = reviews_df['stars_x'].tolist()
reviews_df.head(1)

Unnamed: 0,business_id,name,city,categories,text,stars_x
0,vXEZ-r6fah-5Fjt3a6c-Gw,"""The Cheesecake Factory""",Pittsburgh,American (Traditional);Desserts;Food;American ...,One of my favorite places too take the kids wh...,4


In [14]:
nlp = spacy.load('en_core_web_sm',disable=['parser','ner'])

In [15]:
def TextPreprocessSpaCy(docs):
    text = []
    pos = ['ADJ','NOUN']
    stop = ['-pron-']
    for doc in nlp.pipe(docs):
        tokens = [token.lemma_.lower() for token in doc if token.pos_ in pos and token.is_stop == False] 
        tokens = [i for i in tokens if i not in stop]
        text.append(tokens)
                
    return text

def TextPreprocessNLTK(docs):
    text = []
    pos = ['JJ','JJR','JJS','NN','NNS']
    stop = stopwords.words('english')
    lemma = WordNetLemmatizer()
        
    for i in docs:
        tokens = word_tokenize(i.lower()) # tokenize
        tokens = pos_tag(tokens) # POS tagger
        tokens = [i[0] for i in tokens if i[1] in pos] # POS filter
        tokens = [i for i in tokens if i not in string.punctuation] # remove punctuation
        tokens = [i for i in tokens if i not in stop] # remove stopwords
        tokens = [lemma.lemmatize(i) for i in tokens] # lemmatize
        text.append(tokens)
        
    return text

print('done')

done


In [16]:
start = time.time()
SpaCy = TextPreprocessSpaCy(reviews[0:1000])
end = time.time()
print("{} \n --SpaCy Run time: {}s".format(SpaCy[40],(end-start)))

start = time.time()
NLTK = TextPreprocessNLTK(reviews[0:1000])
end = time.time()
print("\n{} \n --NLTK Run time: {}s".format(NLTK[40],(end-start)))

print("\n{}".format(reviews[40]))

['good', 'place', 'sushi', 'area', 'everything', 'fresh', 'chef', 'pride', 'piece', 'sushi'] 
 --SpaCy Run time: 7.722013473510742s

['best', 'place', 'sushi', 'area', 'everything', 'fresh', 'chef', 'piece', 'sushi'] 
 --NLTK Run time: 11.044819355010986s

By far the best place to get sushi in the area.  Everything is fresh and you can tell that the chef takes pride in each piece of sushi that he creates.


In [17]:
%%time
reviews_spacy = TextPreprocessSpaCy(reviews)

Wall time: 2h 8min 57s


In [18]:
path ='C:\\Users\\schapira.d\\Desktop\\Data Science Meetup\\tokens.pkl' 
with open(path,'wb') as f:
    pickle.dump(reviews_spacy, f)

#load preprocessed dataset:
import pickle
path_tokens ='C:\\Users\\schapira.d\\Desktop\\Data Science Meetup\\tokens_spacy.pkl' 
with open(path_tokens,'rb') as f:
    reviews_spacy = pickle.load(f)

# Phrases model

In [19]:
%%time
#Phrases Modelling
bigram_model = Phrases(reviews_spacy,min_count=25)
bigram_phraser = Phraser(bigram_model)
trigram_model = Phrases(bigram_phraser[reviews_spacy],min_count=25)
trigram_phraser = Phraser(trigram_model)

reviews_trigram = list(trigram_phraser[bigram_phraser[reviews_spacy]])

Wall time: 6min 46s


In [20]:
path ='C:\\Users\\schapira.d\\Desktop\\Data Science Meetup\\phrases.pkl' 
with open(path,'wb') as f:
    pickle.dump(reviews_trigram, f)

In [118]:
print(reviews_trigram[3])

['amazing', 'lunch', 'spot', 'endless', 'option', 'daily_special', 'new', 'delicious', 'option', 'salad', 'lover', 'burger', 'sandwich', 'wrap', 'pasta', 'breakfast', 'variety', 'fruit', 'snack', 'shake', 'anyone', 'quaint', 'spot', 'indoor', 'guy', 'great', 'taste']


In [70]:
#Transforming to df for unstacking and join
df_phrases = pd.DataFrame({"Phrases" : reviews_trigram}).head(100000)

#Unstacking...
df = pd.DataFrame({'Index':np.repeat(df_phrases.index.values, df_phrases.Phrases.str.len()),
              'Phrases':np.concatenate(df_phrases.Phrases.values)})
df.set_index('Index', inplace = True)

#Joining with full data
reviews_phrases = pd.merge(df,reviews_df.head(100000),left_index=True,right_index=True).reset_index(drop=True)
pd.to_numeric(reviews_phrases.stars_x)

reviews_phrases.head()

Unnamed: 0,Phrases,business_id,name,city,categories,text,stars_x
0,favorite,vXEZ-r6fah-5Fjt3a6c-Gw,"""The Cheesecake Factory""",Pittsburgh,American (Traditional);Desserts;Food;American (New);Restaurants,One of my favorite places too take the kids when there home from college or during the holidays ...,4
1,place,vXEZ-r6fah-5Fjt3a6c-Gw,"""The Cheesecake Factory""",Pittsburgh,American (Traditional);Desserts;Food;American (New);Restaurants,One of my favorite places too take the kids when there home from college or during the holidays ...,4
2,kid,vXEZ-r6fah-5Fjt3a6c-Gw,"""The Cheesecake Factory""",Pittsburgh,American (Traditional);Desserts;Food;American (New);Restaurants,One of my favorite places too take the kids when there home from college or during the holidays ...,4
3,college,vXEZ-r6fah-5Fjt3a6c-Gw,"""The Cheesecake Factory""",Pittsburgh,American (Traditional);Desserts;Food;American (New);Restaurants,One of my favorite places too take the kids when there home from college or during the holidays ...,4
4,holiday,vXEZ-r6fah-5Fjt3a6c-Gw,"""The Cheesecake Factory""",Pittsburgh,American (Traditional);Desserts;Food;American (New);Restaurants,One of my favorite places too take the kids when there home from college or during the holidays ...,4


In [71]:
#Pivot phrases by avg. rating
phrases = pd.pivot_table(reviews_phrases, index='Phrases',aggfunc={'stars_x':[np.mean,len]})
phrases.columns = phrases.columns.to_series().str.join('_')
df = phrases.sort_values('stars_x_mean',ascending=False)
df.columns = ['term_frequency','avg_rating']
df = df[df.term_frequency > 100]

In [75]:
#Top phrases with highest avg. rating
df.head(20)

Unnamed: 0_level_0,term_frequency,avg_rating
Phrases,Unnamed: 1_level_1,Unnamed: 2_level_1
recommend,376,4.643617
love_love,133,4.639098
hidden_gem,423,4.626478
personal_favorite,152,4.611842
gem,857,4.588098
best,453,4.576159
worth_penny,165,4.575758
impeccable,352,4.571023
incredible,1432,4.567039
phenomenal,724,4.563536


In [76]:
#Top phrases with lowest avg. rating
df.tail(20)

Unnamed: 0_level_0,term_frequency,avg_rating
Phrases,Unnamed: 1_level_1,Unnamed: 2_level_1
shitty,114,1.95614
lousy,106,1.95283
response,384,1.950521
horrendous,105,1.942857
trash,231,1.922078
nasty,420,1.890476
tasteless,543,1.858195
inedible,323,1.820433
terrible,2225,1.788315
pathetic,118,1.779661


# Topic Modelling

Using gensim implementation of Latent Dirichlet Allocation (LDA)

Link to original paper by David Blei, Andrew Ng, Michael Jordan (2003) Journal of Machine Learning Research

http://www.jmlr.org/papers/volume3/blei03a/blei03a.pdf

In [83]:
%%time
# turn tokenized documents into a id <-> term dictionary
dictionary = corpora.Dictionary(reviews_trigram)
dictionary.filter_extremes(no_below=10, no_above=0.4)
dictionary.compactify()

# convert tokenized documents into a document-term matrix
corpus = [dictionary.doc2bow(i) for i in reviews_trigram]

Wall time: 1min 16s


In [84]:
%%time
#where the magic happens
lda_model = gensim.models.ldamulticore.LdaMulticore(corpus,
                                                    num_topics=50, 
                                                    id2word=dictionary, 
                                                    workers=3, passes=10)

lda_model.save('C:\\Users\\schapira.d\\Desktop\\Data Science Meetup\\lda_5010.model')

Wall time: 4h 23min 3s


In [93]:
lda_topics = lda_model.print_topics(num_topics=-1)
lda_topics = pd.DataFrame(lda_topics, columns=['Topic_#','Keywords']).set_index('Topic_#')
pd.options.display.max_colwidth=100
lda_topics

Unnamed: 0_level_0,Keywords
Topic_#,Unnamed: 1_level_1
0,"0.099*""table"" + 0.024*""door"" + 0.022*""people"" + 0.019*""seat"" + 0.019*""small"" + 0.018*""dirty"" + 0..."
1,"0.052*""dish"" + 0.039*""pasta"" + 0.029*""sauce"" + 0.026*""italian"" + 0.016*""restaurant"" + 0.014*""men..."
2,"0.220*""menu"" + 0.114*""option"" + 0.102*""item"" + 0.031*""healthy"" + 0.030*""choice"" + 0.028*""veggie""..."
3,"0.130*""cake"" + 0.045*""donut"" + 0.035*""vegas"" + 0.035*""cheesecake"" + 0.033*""dessert"" + 0.027*""bes..."
4,"0.023*""liking"" + 0.023*""buffalo"" + 0.021*""unusual"" + 0.016*""ring"" + 0.013*""miss"" + 0.012*""holy"" ..."
5,"0.074*""time"" + 0.045*""year"" + 0.019*""visit"" + 0.019*""restaurant"" + 0.018*""business"" + 0.018*""day..."
6,"0.036*""sweet_potato"" + 0.018*""pudding"" + 0.018*""period"" + 0.015*""boss"" + 0.015*""ice_tea"" + 0.014..."
7,"0.202*""bread"" + 0.044*""butter"" + 0.036*""french"" + 0.032*""warm"" + 0.030*""pastry"" + 0.027*""cheese""..."
8,"0.148*""friendly"" + 0.125*""staff"" + 0.053*""nice"" + 0.050*""service"" + 0.043*""great"" + 0.033*""clean..."
9,"0.144*""wife"" + 0.123*""beer"" + 0.117*""kid"" + 0.058*""family"" + 0.042*""daughter"" + 0.039*""son"" + 0...."


In [96]:
def TopicDetection(doc,min_topic_freq,topn):
    """
    Runs LDA against a document and returns most dominant topics & top keywords
    associated with topics. 
    """
    
    doc_tokens = TextPreprocessSpaCy(doc)[0] #spaCy preprocess
    doc_trigram = list(trigram_phraser[bigram_phraser[doc_tokens]]) # phrase model
    doc_bow = dictionary.doc2bow(doc_trigram) #create bow representation
    doc_lda = lda_model[doc_bow] # run LDA on doc
    
    #create columns for output df
    topic_num = [x[0] for x in doc_lda]
    topic_freq = [x[1] for x in doc_lda]
    topic_keywords = []
    for i in doc_lda:
        keywords = [x[0] for x in lda_model.show_topic(i[0],topn=topn)]
        topic_keywords.append(keywords)
    
    headers = ['topic_num','topic_freq','topic_keywords']
    df = pd.DataFrame(list(zip(topic_num, topic_freq, topic_keywords)),columns=headers)
    df = df[df.topic_freq>min_topic_freq].sort_values('topic_freq',ascending=False).reset_index(drop=True)
    
    return df

In [112]:
text = [reviews[200000]]
%time topic = TopicDetection(text,0.1,5)
print("{}\n\n{}".format(topic,text))

Wall time: 330 ms
   topic_num  topic_freq                                 topic_keywords
0         31    0.292946         [pizza, cheese, topping, slice, crust]
1         17    0.145488       [dinner, restaurant, meal, table, night]
2          0    0.117800             [table, door, people, seat, small]
3         35    0.105540  [amazing, delicious, time, favorite, perfect]
4         40    0.102147               [bar, drink, great, beer, patio]

['OMG. This pizza is AMAZING. I honestly don\'t think I\'ve had better pizza anywhere else. The crust is to die for...I don\'t know what the secret is (the dough recipe? the oven? magical pizza elves?) and I don\'t care as long as they keep making it so I can put it my mouth...nom nom nom. They don\'t take reservations and it can get busy on a weekend evening but you can put in your name and sit at their (very tiny) "bar" for a drink or head next door to Johnson Public House and they will call when your table is ready. Oh, and I have to give o

In [113]:
text = ['My son loves yoyo sushi, the rolls are amazing and fresh but prices are a bit high']
topic = TopicDetection(text,0.1,5)
print("{}\n\n{}".format(topic,text))

   topic_num  topic_freq                                 topic_keywords
0         12    0.334112           [price, worth, cheap, line, quality]
1         37    0.319998               [sushi, roll, fish, fresh, chef]
2         35    0.130335  [amazing, delicious, time, favorite, perfect]
3          9    0.113333            [wife, beer, kid, family, daughter]

['My son loves yoyo sushi, the rolls are amazing and fresh but prices are a bit high']


In [114]:
text = ['Une baguette de pain ou simplement baguette est une variété de pain, reconnaissable à sa forme allongée']
topic = TopicDetection(text,0.1,5)
print("{}\n\n{}".format(topic,text))

   topic_num  topic_freq                          topic_keywords
0         45      0.3775             [et, la, service, mai, pas]
1          7      0.2525   [bread, butter, french, warm, pastry]
2         30      0.1275  [dim_sum, brother, cart, foot, closed]
3         47      0.1275   [taco, mexican, burrito, salsa, chip]

['Une baguette de pain ou simplement baguette est une variété de pain, reconnaissable à sa forme allongée']


In [100]:
#function forked from:
#https://www.machinelearningplus.com/nlp/topic-modeling-gensim-python/

def format_topics_sentences(ldamodel=lda_model, corpus=corpus, texts=reviews, ratings=ratings):
    """
    Extract dominant topic from each document and append original text & rating
    """
    
    # Init output
    sent_topics_df = pd.DataFrame()

    # Get main topic in each document
    for i, row in enumerate(ldamodel[corpus]):
        row = sorted(row, key=lambda x: (x[1]), reverse=True)
        # Get the Dominant topic, Perc Contribution and Keywords for each document
        for j, (topic_num, prop_topic) in enumerate(row):
            if j == 0:  # => dominant topic
                wp = ldamodel.show_topic(topic_num)
                topic_keywords = ", ".join([word for word, prop in wp])
                sent_topics_df = sent_topics_df.append(pd.Series([int(topic_num), round(prop_topic,4), topic_keywords]), ignore_index=True)
            else:
                break
    sent_topics_df.columns = ['Dominant_Topic', 'Perc_Contribution', 'Topic_Keywords']

    # Add original text and rating to the end of the output
    contents = pd.Series(texts)
    ratings = pd.Series(ratings)
    sent_topics_df = pd.concat([sent_topics_df, contents, ratings], axis=1)
    return(sent_topics_df)

#Sample from original data -optional so it runs quicker-
corpus_sample, reviews_sample, ratings_sample = zip(*random.sample(list(zip(corpus, reviews, ratings)), 20000))

#Run fuction
df_topic_sents_keywords = format_topics_sentences(ldamodel=lda_model, 
                                                  corpus=corpus_sample, 
                                                  texts=reviews_sample,
                                                  ratings=ratings_sample)
# Format
df_dominant_topic = df_topic_sents_keywords.reset_index()
df_dominant_topic.columns = ['Document_No', 'Dominant_Topic', 'Topic_Perc_Contrib', 'Keywords', 'Text', 'Rating']

# Show
df_dominant_topic.head()

Unnamed: 0,Document_No,Dominant_Topic,Topic_Perc_Contrib,Keywords,Text,Rating
0,0,11.0,0.433,"flavor, sauce, little, bit, taste, sweet, nice, texture, light, meat",The interior was beautiful but the food....not so much. We went during lunch and three of us had...,1
1,1,24.0,0.7171,"dish, spicy, curry, indian, spice, flavour, thai, rice, favourite, restaurant",Always generous portions of rice. Thai pineapple is hands down the best! Tuna tango is also anot...,5
2,2,28.0,0.2048,"coffee, friend, girl, little, guy, drink, nice, open, cafe, people",Yes it's only 8:45 in the morning but I am still having Rigatony's withdrawal. Can you guys ship...,5
3,3,40.0,0.2114,"bar, drink, great, beer, patio, bartender, happy_hour, nice, night, menu",Else's the place to be. \n\nAnd I don't mean it in a scenester or hipster sort of way. It's just...,5
4,4,1.0,0.2076,"dish, pasta, sauce, italian, restaurant, menu, cheese, mushroom, meal, dessert",I just revisited this place on Satuday.\nI recommend that you take a tiny sample of everything i...,3


In [101]:
df_topics = pd.pivot_table(df_dominant_topic,index=['Dominant_Topic'],
                           aggfunc={'Rating':[np.mean,len],'Keywords':np.unique})
df_topics.columns = df_topics.columns.to_series().str.join('_')
df = df_topics.sort_values('Rating_mean',ascending=False)
df[df.Rating_len > 25]

Unnamed: 0_level_0,Keywords_unique,Rating_len,Rating_mean
Dominant_Topic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
35.0,"amazing, delicious, time, favorite, perfect, yummy, fresh, everything, wonderful, ice_cream",607,4.678748
22.0,"great, service, excellent, awesome, time, atmosphere, amazing, fantastic, delicious, customer",1513,4.592201
8.0,"friendly, staff, nice, service, great, clean, attentive, delicious, fresh, atmosphere",1169,4.433704
44.0,"location, new, original, downtown, convenient, drive, favorite, valley, close, fav",47,4.148936
17.0,"dinner, restaurant, meal, table, night, server, service, appetizer, great, waiter",1717,4.051252
19.0,"sweet, tea, chocolate, cookie, cream, fruit, sugar, bomb, treat, strawberry",62,4.016129
48.0,"restaurant, authentic, chinese, style, dish, pho, family, area, traditional, asian",321,4.009346
3.0,"cake, donut, vegas, cheesecake, dessert, best, unbelievable, dozen, lemon, wedding",28,4.0
2.0,"menu, option, item, healthy, choice, veggie, vegan, vegetarian, lot, variety",189,3.989418
29.0,"breakfast, egg, brunch, bacon, pancake, waffle, morning, coffee, potato, sausage",640,3.984375
