In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import numpy as np
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer

In [4]:
DATASET_FILE = "drive/MyDrive/processed_dataset.csv"
df = pd.read_csv(DATASET_FILE)

In [5]:
# Extracting negative reviews from dataset, her we take 1 and 2 stars reviews
one_star_df = df.loc[df['stars'] == 1]
two_stars_df = df.loc[df['stars'] == 2]
negative_df = one_star_df.append(two_stars_df, ignore_index = True)

negative_df[4998:5002] # checking for the merging at the junction of the dataframes

Unnamed: 0,text,stars,length,clean text
4998,"Awful. Awful. Awful. Cold chicken, semi-stale ...",1,84,awful awful awful cold chicken semi stale chip...
4999,Even if this pizza is the best thing I've ever...,1,54,even pizza best thing ever taste not_order ord...
5000,"As a Northeastern student, I was excited when ...",2,142,northeastern student excite place open last ye...
5001,So I had to knock my review down a couple of s...,2,206,knock review couple star consistency important...


In [6]:
# negative_df[4998:5002] # checking for the merging at the junction of the dataframes

In [7]:
vectorizer = TfidfVectorizer(max_df= 0.8 , min_df= 0.01)
X = vectorizer.fit_transform(negative_df['clean text'])

BOW = pd.DataFrame(data=X.toarray(), columns=vectorizer.get_feature_names_out())


In [9]:
def display_topics(model, feature_names, num_top_words, topic_names=None):
    topic_tokens = []
    for ix, topic in enumerate(model.components_):
        #print topic, topic number, and top words
        if not topic_names or not topic_names[ix]:
            print("\nTopic ", ix)

        else:
            print("\nTopic: '",topic_names[ix],"'")
        print(", ".join([feature_names[i] for i in topic.argsort()[:-num_top_words - 1:-1]]))
        topic_tokens.append(", ".join([feature_names[i] for i in topic.argsort()[:-num_top_words - 1:-1]]))
    return topic_tokens

In [10]:
from sklearn.decomposition import NMF

number_of_topics = 15
nmf_model = NMF(number_of_topics)

doc_topic = nmf_model.fit_transform(BOW)



In [None]:
topic_tokens = display_topics(nmf_model, vectorizer.get_feature_names(), 10)

In [12]:
doc_topic_df = pd.DataFrame(doc_topic)
top_reviews = []

for i in range(number_of_topics):
  top_reviews.append(negative_df['text'][doc_topic_df[i].nlargest(3).index])

In [13]:
top_review_1, top_review_2, top_review_3 = [], [], []

for i in range(len(top_reviews)):
  top_review_1.append(top_reviews[i].iloc[0])
  top_review_2.append(top_reviews[i].iloc[1])
  top_review_3.append(top_reviews[i].iloc[2])


In [14]:
topic_df = pd.DataFrame()
topic_df["topics"] = topic_tokens
topic_df["top_review_1"] = top_review_1
topic_df["top_review_2"] = top_review_2
topic_df["top_review_3"] = top_review_3

In [25]:
topic_df

Unnamed: 0,topics,top_review_1,top_review_2,top_review_3
0,"order, take, delivery, call, wrong, get, hour,...",Ordered delivery and the food came 30 minutes ...,Ordering on app really sucks . It takes too lo...,"Normally, I don't write off restaurants based ..."
1,"good, price, restaurant, menu, small, portion,...",Went here for Mothers Day lunch on a Saturday ...,"We had heard wonderful things about Coquine, s...","For the price comparable to Blu ill, NY we had..."
2,"pizza, crust, cheese, slice, topping, delivery...",Ok we tried Mod pizza tonight for 1st time. We...,I ordered pizza by the phone call. When I got ...,Worst Dominos pizza experience I have ever had...
3,"place, really, go, get, good, people, try, wou...",Heard that this is a very popular place in Bos...,"This place used to be a cool, chill place. Now...",This place is just okay. I've never had any f...
4,"food, cold, quality, back, mediocre, eat, chin...",The place is really charming in the German Vil...,FOOD IS GOOD SERVICE IS POOR. Walked in and or...,Go for the drinks at the bar. The food is med...
5,"wait, minute, table, us, seat, come, hour, 15,...",It's currently my second time here. We are a p...,Seriously it's worth the wait!? We waited frea...,DO NOT GO HERE - THE WAITS ARE 5 TIMES LONGER ...
6,"burger, fry, bun, well, onion, cheese, medium,...",The service was good but they forgot several i...,"Really rather disappointing, got a burger and ...",It was an unfortunately unpleasant experience....
7,"service, bad, customer, ever, slow, horrible, ...",bad service dont go here! They needs to learn ...,Worst service ever!!!!!!!!!! Just pray you don...,I just went there tonight to have a nice dinne...
8,"chicken, rice, fry, sauce, wing, dry, piece, s...",This chicken is not all that. It's just lightl...,Two recent visits and I have to ask...where's ...,Ordered chicken shawarma but guess what's insi...
9,"bar, drink, beer, bartender, night, friend, go...",We went here on a Saturday night when they had...,Roscoe's is your middle-of-the-road bar locate...,I stopped in for a drink at the bar. The barte...


In [17]:
topic = {'topic1':'ACCUEIL ET SERVICE',
         'topic2':'NOURRITURE ASIATQUE MAUVAISE ',
         'topic3':"TEMPS D'ATTENTE ET PIZZA FROIDE OU TROP CUITE",
         'topic4':'MAUVAISE EXPERIENCE AVEC LE PESONNEL ', 
         'topic5':'PROBLEME BURGER (ERREUR COMMANDE, PRIX ELEVE, ETC)', 
         'topic6':"TEMPS D'ATTENTE TROP LONG", 
         'topic7':'PROBLEME QUALITE BURGER', 
         'topic8':'TRES MAUVAIS SERVICE CLIENT', 
         'topic9':'PROBLEME DE POULET', 
         'topic10':'BAR MAUVAIS EXPEIENCE AVEC LE PERSONNEL', 
         'topic11':'CLIENT DECU, NE REVIENDRA PAS', 
         'topic12':'NOURRITURE JAPONNAISE DECEVANTE', 
         'topic13':'MAUVAIS SANDWICH',
         'topic14':'EXPERIENCE MEDIOCRE, PRIX LEGEREMENT TROP ELEVES', 
         'topic15':'PROBLEME DANS LA COMMANDE' }

In [None]:
topic

In [23]:
pickle.dump(vectorizer, open('drive/MyDrive/vectoriseur', 'wb'))
pickle.dump(nmf_model, open('drive/MyDrive/model', 'wb'))