## Import Libraries

In [1]:
import pandas as pd
from gensim.test.utils import datapath
import nltk
from nltk import punkt
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
stop = set(stopwords.words('english'))
pd.options.display.max_columns = 999
lemmatizer = WordNetLemmatizer()
tokenizer = word_tokenize


## Helper Functions

In [None]:
import string 
def remove_punctuations(text):
    for punctuation in string.punctuation:
        text = text.replace(punctuation, ' ')
    return text

## Stop Words

In [None]:
stops = ['thing', 'might', 'makes', 'eating', 'real', 'let', 'tried', 'far', 'reasonable', 'husband', 'truly',
        'pm', 'am', 'else', 'seemed', 'someone', 'good', 'like', 'really', 'place', 'would', 'pretty', 'bit', 'much',
        'better', 'great', 'amazing', 'place', 'delicous', 'back', 'good', 'definitely', 'best', 'love', 'recommend',
        'good', 'great', 'place', 'nice', 'really', 'bar', 'back', 'definitely', 'came', 'pretty', 'dish', 'ordered',
        'dessert', 'happy', 'hour', 'drinks', 'beer', 'bar', 'new', 'always', 'one', 'go', 'location', 'get', 'place',
        'york', 'us', 'time', 'table', 'one', 'order', 'get', 'came', 'would', 'back', 'even', 'course', 'waaaine',
        'dining', 'meal', 'course', 'dishes', 'impeccable', 'juicy', 'later', 'margarita', 'fine', 'hard', 'less',
        'especially', 'items', 'given', 'vietnamese', 'late', 'not', 'can', 'astoria', 'steakhouse', 'terrible',
        'empty', 'like', 'boat', 'leave', 'call', 'offer', 'consider', 'disappoint', 'without', 'stop', 'delivery',
        'patio', 'smoke', 'dress', 'keep', 'market', 'park', 'floor', 'hotel', 'th', 'upstairs', 'locate', 'near',
        'pair', 'piece', 'solid', 'set', 'fill', 'interior', 'put', 'chinatown', 'piece', 'shop', 'solid', 'season',
        'boy', 'welcome', 'return', 'price', 'expensive', 'chea', 'overprice', 'cost', 'money', 'spend', 'receive',
        'include', 'quick', 'fast', 'black', 'salt', 'add', 'charge', 'sum', 'bottomless', 'poach', 'kitchen', 
        'plat', 'mean', 'anything', 'three', 'tiny', 'level', 'dry', 'half', 'use', 'anything', 'mean', 'honestly',
        'almost', 'summer', 'close', 'rich', 'salty', 'red', 'couple', 'plat', 'show', 'occasion', 'least', 'lack',
        'reason', 'either', 'miss', 'hours', 'rat', 'guess', 'instead', 'understand', 'mind', 'outstanding', 
        'platter', 'rude', 'greet', 'guy', 'girl', 'woman', 'man', 'cold', 'soft', 'white', 'fixe', 'pre', 'prix',
        'refresh', 'le', 'range', 'terrific', 'craft', 'hill', 'non', 'pot', 'win', 'live', 'weekend', 'surprise',
        'shack', 'po', 'raw', 'dozen', 'extra', 'finish', 'care', 'extremely', 'spring', 'bag', 'cheap', 'card',
        'plan', 'kid', 'credit', 'card', 'gluten', 'today', 'counter', 'chelsea', 'daughter', 'nephew', 'niece',
        'son', 'father', 'mother', 'aunt', 'uncle', 'father', 'mother', 'ago', 'morning', 'sister', 'brother', 
        'kind', 'yet', 'prepare', 'present', 'creative', 'beautifully', 'presentation', 'appetizers', 'house', 
        'upon', 'stumble', 'ny', 'square', 'station', 'deal', 'option', 'number', 'note', 'minute', 'early', 
        'private', 'run', 'hand', 'establishment', 'type', 'cook', 'box', 'fact', 'rather', 'remember', 'dollars',
        'forget', 'refill', 'ready', 'mins', 'blue', 'farm', 'freeze', 'strong', 'lady', 'man', 'tap', 'hell', 
        'eye', 'b', 'watch', 'game', 'meet', 'hh', 'town', 'grab', 'c', 'exceptional', 'celebrate', 'treat',
        'beyond', 'year', 'please', 'anniversary', 'birthday', 'week', 'certainly', 'expectations', 'note', 'book', 
        'four', 'choose', 'balance', 'simply', 'perhaps', 'rather', 'together', 'mediocre', 'unfortunately',
        'rather', 'meh', 'mehhhhh', 'buy', 'con', 'expectations', 'store', 'hype', 'suppose', 'despite', 'unless', 
        'forget', 'event', 'baby', 'partner', 'scene', 'photo', 'days', 'locations', 'bc', 'frequent', 'hair', 
        'pitcher', 'utensils','fly', 'fiancé', 'brown', 'chic', 'tart', 'savory', 'specials', 'low', 'notch', 
        'dark', 'combo', 'saw', 'mom', 'already', 'poor', 'worst', 'save', 'horrible', 'business', 'ruin', 'happen',
        'smell', 'sorry', 'immediately', 'help', 'stand', 'huge', 'w', 'soon', 'casual', 'district', 'unlimited',
        'board', 'stay', 'upper', 'par', 'part', 'garden', 'stand', 'somewhat', 'term', 'style', 'fan', 'write',
        'read', 'literally', 'change', 'mention', 'pick', 'send', 'weird', 'whole', 'basically', 'annoy', 'delish',
        'mouth', 'question', 'refill', 'question', 'boyfriend', 'bf', 'girlfriend', 'gf', 'wow', 'stuff',
        'complimentary', 'superb', 'pleasant', 'reasonably', 'provide', 'entire', 'rush']



In [2]:
morestops = ['light', 'main', 'appetizer', 'city', 'left', 'felt', 'pot', 'chinese', 'hot', 'name', 'sure', 
            'walked', 'sit', 'door', 'yelp', 'group', 'options', 'portions', 'find', 'absolutely', 'neighborhood',
            'ice', 'desserts', 'enjoyed', 'courses', 'busy', 'large', 'friday', '4', 'able', 'packed', 
            'reservations', 'saturday', 'reservation', 'full', 'average', 'feel', 'looking', 'gave', 'brought', 
            'must', 'york', 'michelin', 'not', 'can', 'tell', 'home', 'awesome', 'different', 'bbq', 'lot', 'size', 
            'amount', 'plate', 'long', 'theres', 'open', 'line', 'big', 'crowded', 'seated', 'lovely', 'cozy', 
            'waiting', 'sat', 'seats', 'inside', 'cool', 'wall', 'walk', 'front', 'free', 'old', 'sometimes', 
            'usually', 'yes', 'theyre', 'youll', 'often', 'isnt', 'things', 'gem', 'bill', 'cash', 'bland', 'room', 
            'end', 'view', 'evening','waited', '20', '5', 'friendly', 'loud', 'fun', 'mexican', 'coming', 'enjoy',
            'greek', 'outdoor', 'warm', 'not', 'can', 'oh', 'wrong', 'na', 'street', 'cocktail', 'beers', 'glass',
            'brooklyn', 'manhattan', 'may', 'bottle', 'started', 'tender', 'slow', 'sauce', 'sweet', 'seating', 
            'lunch', 'menu', 'stars', 'quality', 'nothing', 'worth', 'reservation', 'recommend', 'spot', 'happy', 
            'everything', 'cooked', 'bread', 'outside', 'tables', 'location', 'mac', 'course', 'made', 'never',
            'another', 'know', 'take', 'korean', 'french', 'italian', 'sauce', 'area', 'even', 'said', 'took', 
            'people', 'could', 'wait', 'first', 'always', 'go', 'love', 'new', 'im', 'come', 'favorite', 'times', 
            'excellent', 'night','hour', 'pretty', 'bit', 'small', 'little', 'though', 'much', 'think', 'better',
            'quite', 'spicy','side', 'time', 'definitely', 'would', 'delicious', 'dont', 'dish', 'ordered', 'table', 
            'place', 'nice', 'bar', 'also', 'came', 'great', 'good', 'amazing', 'restaurant', 'really', 'well', 
            'bad', 'minutes', 'nice', 'back', 'best', 'ive', 'like','dont','get', 'us', 'came', 'order','minutes',
            'one', 'asked', 'got', 'didnt', 'perfectly', 'meal', 'dishes', 'cream', 'want', 'make', 'youre', 'eat', 
            'way', 'thats', 'say', 'need', 'see', 'something', 'dinner', 'every', 'highly', 'wonderful', 'nyc', 
            'perfect', 'told', 'drinks', 'wanted', 'went', 'wasnt', 'taste', 'overall', '2', '3', 'around', 
            'japanese', 'flavor', 'rib', 'can', 'not', 'oh', 'lol', 'point', 'wife', 'brooklyn',' vietnamese', 
            'bite', 'n', 'enough', 'although', 'expect', 'per', 'person', 'tender', 'flavorful', 'green', 'tasted',
            'texture', 'give', 'going', 'still', 'ever', 'reviews', 'review', 'cant', 'many', 'restaurants', 'wine',
            'dining', 'top', 'beautiful', 'special', 'tasting', 'birthday', 'incredible', 'fried', 'sunday', 'water', 
            'friend', 'drink', 'friends', 'super', 'music', 'cocktails', 'vibe', 'space', 'coffee', 'medium', 'meat',
            'rare', 'grill', 'cut', 'bone', 'roasted', 'fresh', 'brunch', 'breakfast', 'dessert', 'toast', 'two',
            'dessert','seafood', 'perfectly', 'meal', 'dishes', 'cream', 'want', 'make', 'youre', 'eat', 'way', 
            'thats', 'say', 'need', 'see', 'something', 'dinner', 'every', 'highly', 'wonderful', 'nyc', 'perfect',
            'told', 'drinks', 'wanted', 'went', 'wasnt', 'taste', 'overall', '2', '3', 'around', 'japanese', 'flavor', 'rib', 'can', 'not', 'oh', 'lol',
            'point', 'wife', 'brooklyn', 'vietnamese', 'bite', 'n', 'enough', 'although', 'expect', 'per', 'person',
            'tender', 'flavorful', 'green', 'tasted', 'texture', 'tea', 'thai', 'ask', 'arrived', 'party', 'cake', 
            'date', 'selection', 'cute', 'try', 'bowl', 'probably', 'high', 'however', 'id', 'decent', 'maybe', 
            'places', 'village', 'east', 'tapas', 'west', 'sangria', 'de', 'margaritas', 'dan', 'visit', 'last', 
            'family', 'years', 'everyone', 'thank', 'hearing', 'tons','rave','finally','decided','actually',
            'second','puts','bring','youd','impress','list', 'personally', 'neighborhood', 'loved', 'fantastic', 
            'absolutely', 'grilled', 'tasty', 'crispy', 'portion', 'served', 'day', 'right', 'party', 'next',
            'since', 'away', 'arrived', 'ok', 'however', 'maybe', 'tip', 'star', 'okay', 'short', 'flavors', 
            'vegan', 'la', 'unique', 'ingredients', 'interesting', 'cuisine', 'time', 'definitely', 'would', 
            'delicious', 'dont', 'dish', 'ordered', 'table', 'place', 'nice', 'bar', 'also', 'came', 'great', 'good', 
            'amazing', 'restaurant', 'really', 'well', 'bad', 'minutes', 'nice', 'back', 'best', 'ive', 'like','dont',
            'get', 'us', 'came', 'order','minutes','one', 'asked', 'got', 'didnt','thing', 'might', 'makes', 'eating', 
            'real', 'let', 'tried', 'far', 'reasonable', 'husband', 'truly', 'pm', 'am', 'else', 'seemed', 'someone', 
            'good', 'like', 'really', 'place', 'would', 'pretty', 'bit', 'much', 'better', 'great', 'amazing', 'place', 
            'delicous', 'back', 'good', 'definitely', 'best', 'love', 'recommend', 'good', 'great', 'place', 'nice', 
            'really', 'bar', 'back', 'definitely', 'came', 'pretty', 'dish', 'ordered', 'dessert', 'happy', 'hour', 
            'drinks', 'beer', 'bar', 'new', 'always', 'one', 'go', 'location', 'get', 'place', 'york', 'us', 'time',
            'table', 'one', 'order', 'get', 'came', 'would', 'back', 'even', 'course', 'wine', 'dining', 'meal', 
            'course', 'dishes', 'impeccable', 'juicy', 'later', 'margarita', 'fine', 'hard', 'less', 'especially', 
            'items', 'given', 'vietnamese', 'late', 'not', 'can', 'here', 'there', 'yummy', 'can', 'din', 'seat', 
            'look', 'crowd', 'work', 'amaze', 'pay', 'seem', 'share', 'start']

In [None]:
stop.update(stops)

In [None]:
stop.update(morestops)

## Load Model

In [None]:
df = pd.read_csv('cleanedManhattanfull.csv')

In [None]:
from gensim.test.utils import datapath

fname = datapath('ldabestbest')
lda = LdaModel.load(fname, mmap='r')

##  Cleaning and Preprocessing

In [None]:
df = df.fillna(value=str('none'))

In [None]:
df.drop(columns=['Unnamed: 0', 'Restaurant_Id'], axis=1, inplace=True)

In [None]:
df['Review'] = df['Review'].apply(remove_punctuations)

In [None]:
df['Review'] = df['Review'].str.lower()

In [None]:
df['Review'] = df['Review'].str.replace('\d+', ' ')

In [None]:
df['Review'] = df['Review'].apply(lambda x: ' '.join([item for item in x.split() if item not in stop]))

In [None]:
df['Review'] = df['Review'].apply(lambda x: ' '.join([lemmatizer.lemmatize(word, pos = 'v') for word in x.split()]))

In [None]:
df['token_Review']= df['Review'].apply(tokenizer)

## Scoop to List

In [None]:
tokenslist = df1.values.tolist()

### Tidying up List

In [None]:
temp = []
for row in tokenslist:
    if type(row) == list:
        temp.extend(row)

In [None]:
def deleting(list_1,del_name):
    for sub_list in tqdm(list_1):
        if del_name in sub_list:
            list_1.remove(sub_list)
    return list_1

deleting(temp, 'none')

### Last Stop Word Removal

In [None]:
temps=[]
for review in temp:
    wr=[]
    for word in review:
        if word not in stop:
            wr.append(word)
    temps.append(wr)

### Converting to BoW

In [None]:
import gensim
from gensim import corpora
dictionary = corpora.Dictionary(temps)

doc_term_matrix = [dictionary.doc2bow(doc) for doc in temps]

## LDA to EVERYTHING

In [None]:
for i+1 in range(150):
    col_name = 'review_topic_'+str(i+1)
    df[col_name] = 0

In [None]:
for row_index, row in df.iterrows():
    row = row.copy()
    new_doc = dictionary.doc2bow(row['temps'])
    lda_result = lda[new_doc]
    for tpc in lda_result:
        col_name = 'review_topic_'+(str(tpc[0]).strip())
        print(row_index,col_name, tpc[1])
        df.loc[row_index,col_name] = tpc[1]