In [40]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
from transformers import pipeline
import pandas as pd

In [41]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [42]:
model_name = "bigscience/bloomz-560m"

In [43]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

## Playing with a sample first

In [44]:
def buildprompt(sentence):
    prompt = "Given the sentence delimited by triple backticks ```{" + sentence + "}``` Would you rate the " \
             "previous review as positive, neutral or negative?"
    return prompt

In [45]:
sentence = "go there . dont miss the chance. the food and service was great. just a bit expensive. ," \
           "my bill , alone was 100 $ ( no alcohol included)"

In [46]:
prompt = buildprompt(sentence)

In [47]:
generator = pipeline('text-generation', model=model, tokenizer=tokenizer,do_sample=False)
result = generator(prompt, max_length=200)
print(result)

[{'generated_text': 'Given the sentence delimited by triple backticks ```{go there . dont miss the chance. the food and service was great. just a bit expensive. ,my bill , alone was 100 $ ( no alcohol included)}``` Would you rate the previous review as positive, neutral or negative? positive'}]


## Lets try for one of the topics resulted from the topic modeling approach

In [48]:
# load the original data
df = pd.read_csv('New_Delhi_reviews.csv')
df.dropna(inplace = True)

In [49]:
# using the index as id. This will help to identify the documents after topic modeling
df['id'] = df.index
df

Unnamed: 0,rating_review,review_full,id
0,5,"Totally in love with the Auro of the place, re...",0
1,5,I went this bar 8 days regularly with my husba...,1
2,5,We were few friends and was a birthday celebra...,2
3,5,Fatjar Cafe and Market is the perfect place fo...,3
4,5,"Hey Guys, if you are craving for pizza and sea...",4
...,...,...,...
147576,5,Near by airport very calm and cool environment...,147576
147577,5,My favourite place to stay. Great service. Ash...,147577
147578,5,"good food with nice decoration, drinks list al...",147578
147579,4,Near to airport .it is fine property. Staff i...,147579


In [50]:
df_tm = pd.read_csv('documents_all.csv', sep='\t')
# the index correspond to the id (order) of the document in the original dataframe. We can use 
# it to identify the document and restore the ratings
df_tm['id'] = df_tm.index
df_tm

Unnamed: 0.1,Unnamed: 0,Document,Topic,Name,Representation,Aspect1,Representative_Docs,Top_n_words,Probability,Representative_document,id
0,0,totally love auro place really beautiful quite...,-1,-1_restaurants_restaurant_buffet_meal,"['restaurants', 'restaurant', 'buffet', 'meal'...","['restaurants', 'buffet', 'delhi', 'dinner', '...",['worst indian restaurant goes dhaba one worst...,restaurants - restaurant - buffet - meal - lun...,0.609092,False,0
1,1,went bar days regularly husband fully satisfie...,19,19_great service_excellent service_amazing ser...,"['great service', 'excellent service', 'amazin...","['good service', 'hospitality', 'service mr', ...",['indeed pleasant enjoyable relaxing atmospher...,great service - excellent service - amazing se...,0.008328,False,1
2,2,friends birthday celebration food good taste r...,12,12_birthday celebration_birthday party_celebra...,"['birthday celebration', 'birthday party', 'ce...","['birthday celebration', 'anniversary dinner',...",['several time never fails deliver great place...,birthday celebration - birthday party - celebr...,0.185043,False,2
3,3,fatjar cafe market perfect place casual lunch ...,-1,-1_restaurants_restaurant_buffet_meal,"['restaurants', 'restaurant', 'buffet', 'meal'...","['restaurants', 'buffet', 'delhi', 'dinner', '...",['worst indian restaurant goes dhaba one worst...,restaurants - restaurant - buffet - meal - lun...,0.937262,False,3
4,4,hey guys craving pizza searching visit cafe ye...,-1,-1_restaurants_restaurant_buffet_meal,"['restaurants', 'restaurant', 'buffet', 'meal'...","['restaurants', 'buffet', 'delhi', 'dinner', '...",['worst indian restaurant goes dhaba one worst...,restaurants - restaurant - buffet - meal - lun...,0.657922,False,4
...,...,...,...,...,...,...,...,...,...,...,...
147574,147574,near airport calm cool environment nice staff ...,-1,-1_restaurants_restaurant_buffet_meal,"['restaurants', 'restaurant', 'buffet', 'meal'...","['restaurants', 'buffet', 'delhi', 'dinner', '...",['worst indian restaurant goes dhaba one worst...,restaurants - restaurant - buffet - meal - lun...,0.128644,False,147574
147575,147575,favourite place stay great service ashish bhar...,-1,-1_restaurants_restaurant_buffet_meal,"['restaurants', 'restaurant', 'buffet', 'meal'...","['restaurants', 'buffet', 'delhi', 'dinner', '...",['worst indian restaurant goes dhaba one worst...,restaurants - restaurant - buffet - meal - lun...,0.162641,False,147575
147576,147576,good food nice decoration drinks list alot cho...,-1,-1_restaurants_restaurant_buffet_meal,"['restaurants', 'restaurant', 'buffet', 'meal'...","['restaurants', 'buffet', 'delhi', 'dinner', '...",['worst indian restaurant goes dhaba one worst...,restaurants - restaurant - buffet - meal - lun...,0.010842,False,147576
147577,147577,near airport fine property staff warm friendly...,-1,-1_restaurants_restaurant_buffet_meal,"['restaurants', 'restaurant', 'buffet', 'meal'...","['restaurants', 'buffet', 'delhi', 'dinner', '...",['worst indian restaurant goes dhaba one worst...,restaurants - restaurant - buffet - meal - lun...,0.273919,False,147577


## Focusing on topic 214

In [51]:
list_of_ids = df_tm[df_tm['Topic'] == 214]['id'].to_list()
print(len(list_of_ids))
df_topic = df[df['id'].isin(list_of_ids)]
df_topic

22


Unnamed: 0,rating_review,review_full,id
8270,3,"While we normally dont eat meals at HRC, we di...",8270
14886,5,Food is outstanding. Service is professional a...,14886
20992,1,Drinks and food seem cheap. They even have a 2...,20992
22121,4,they have variety of dishes and vairety of dri...,22121
27519,4,"Can't fault the food, reasonable portion, serv...",27519
27912,5,Visited here on the 12th Nov 2016. Nice surpri...,27912
35709,4,Tonight we had dinner for two adults plus one ...,35709
44176,2,"Great place for a drink and cocktails, but add...",44176
45269,2,"Had lunch here December 21, 2016. The Butter C...",45269
49981,2,We ate here in November 2016. The cost of food...,49981


Getting a sample for each rating: 1, 2, 3, 4, and 5. Then, using bloomz-560m to predict the sentiment.

In [59]:
for i in range(1, 6):
    sample_rating = df_topic[df_topic['rating_review'] == i]['review_full'].values[0]
    prompt = buildprompt(sample_rating)
    result = generator(prompt, max_length=200)
    print('Sample for rating = ', i)
    print(result)

Sample for rating =  1
[{'generated_text': 'Given the sentence delimited by triple backticks ```{Drinks and food seem cheap. They even have a 2 for 1 happy hour, they also state no service charge on the menu but still add 20% vat onto drinks and 5% to food. I had left a really good review bit deleted it to leave this revised view once the bill arrived. Dodgy tactics lost you a good review pebble st}``` Would you rate the previous review as positive, neutral or negative? negative'}]
Sample for rating =  2
[{'generated_text': 'Given the sentence delimited by triple backticks ```{Great place for a drink and cocktails, but add on top of your bill 35.8% of taxes, service charge and service tax.}``` Would you rate the previous review as positive, neutral or negative? positive'}]
Sample for rating =  3
[{'generated_text': 'Given the sentence delimited by triple backticks ```{While we normally dont eat meals at HRC, we did this time, the food was consistent with others. Wasnt impressed with th