In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

Encode and calculate sentiment

In [8]:
# tokens = tokenizer.encode('I hated this, absolutely worst', return_tensors='pt')
tokens = tokenizer.encode('I love this, absolutely great', return_tensors='pt')
print(tokens)
tokenizer.decode(tokens[0])

tensor([[  101,   151, 11157, 10372,   117, 35925, 10563, 11838,   102]])


'[CLS] i love this, absolutely great [SEP]'

In [9]:
result = model(tokens)
print(result)
print(result.logits)
print(torch.argmax(result.logits))
print(int(torch.argmax(result.logits))+1)

SequenceClassifierOutput(loss=None, logits=tensor([[-2.0864, -2.6097, -1.4409,  0.9733,  4.2677]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
tensor([[-2.0864, -2.6097, -1.4409,  0.9733,  4.2677]],
       grad_fn=<AddmmBackward0>)
tensor(4)
5


Collect reviews

In [10]:

link=r'https://www.yelp.com/biz/mejico-sydney-2'
r = requests.get(link)
# print(r)
soup = BeautifulSoup(r.text,'html.parser')
# print(soup.prettify())
regex = re.compile('.*comment.*')
# print(regex)
scrape_results = soup.find_all('p', {'class':regex})
reviews = [result.text for result in scrape_results]
# print(scrape_results)
print(scrape_results[0])
print(scrape_results[0].text)
print(reviews[0])

<p class="comment__09f24__D0cxf y-css-1wfz87z"><span class="raw__09f24__T4Ezm" lang="en">Seated without a booking on a super busy Saturday night. Lovely, warm, and Theo right hostess also looked after our table and went out of her way to give detailed ingredients in every dish to avoid allergies for one of us. And the food was great! Guacamole made right at our table, everything prepared with our allergies in mind, and great dish recommendations. We'd been visiting Sydney for about a week from Melbourne, and this was by far our best dining experience. I'd definitely return here in the future.</span></p>
Seated without a booking on a super busy Saturday night. Lovely, warm, and Theo right hostess also looked after our table and went out of her way to give detailed ingredients in every dish to avoid allergies for one of us. And the food was great! Guacamole made right at our table, everything prepared with our allergies in mind, and great dish recommendations. We'd been visiting Sydney f

In [11]:
reviews[0]

"Seated without a booking on a super busy Saturday night. Lovely, warm, and Theo right hostess also looked after our table and went out of her way to give detailed ingredients in every dish to avoid allergies for one of us. And the food was great! Guacamole made right at our table, everything prepared with our allergies in mind, and great dish recommendations. We'd been visiting Sydney for about a week from Melbourne, and this was by far our best dining experience. I'd definitely return here in the future."

Load reviews into DataFrame and Score

In [12]:
df = pd.DataFrame(np.array(reviews), columns=['review'])

In [13]:
df.head()

Unnamed: 0,review
0,Seated without a booking on a super busy Satur...
1,The food was decent not great.. We had the gu...
2,"Food was okay, guacamole was below average. Se..."
3,Out of all the restaurants that I tried in Syd...
4,The food and service here was really good. It...


In [14]:
df['review'].iloc[0]

"Seated without a booking on a super busy Saturday night. Lovely, warm, and Theo right hostess also looked after our table and went out of her way to give detailed ingredients in every dish to avoid allergies for one of us. And the food was great! Guacamole made right at our table, everything prepared with our allergies in mind, and great dish recommendations. We'd been visiting Sydney for about a week from Melbourne, and this was by far our best dining experience. I'd definitely return here in the future."

In [15]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [16]:
sentiment_score(df['review'].iloc[0])

5

In [17]:
score_count = []
for row in df.itertuples():
    score = sentiment_score(row.review)
    print(f'review: {row.review}\nScore: {score}')
    score_count.append(score)

print(score_count)
average_score = np.mean(score_count)
print(f'Average Score: {average_score}')

    

review: Seated without a booking on a super busy Saturday night. Lovely, warm, and Theo right hostess also looked after our table and went out of her way to give detailed ingredients in every dish to avoid allergies for one of us. And the food was great! Guacamole made right at our table, everything prepared with our allergies in mind, and great dish recommendations. We'd been visiting Sydney for about a week from Melbourne, and this was by far our best dining experience. I'd definitely return here in the future.
Score: 5
review: The food was decent not great..  We had the guacamole which was bland and came with some type of plantain chips.. The chicken and steak tacos were good.. But the service was poor. We had a waitress with an attitude. She seemed upset whenever we asked for anything.  She would walk by and just stick up her hand and say " just wait ".  She spilled the ingredients to make the guacamole all over the table but never apologized. The waitress didn't come by at all, no

Try using real data from google review (Nasi Kerabu Mok Nab Pantai Dalam)

In [18]:
from selenium import webdriver
from selenium.webdriver.common import keys
import time

url = 'https://www.google.com/search?gs_ssp=eJwNyUEOQDAQAMC4SrxBL852tQRP8Ittu6SplijS5zPXKat2axHfdL3DGBIUcwNZojFKA69WwQDcz5B5xAkUW5Qdyj-XOlJywvNF-hHh8JG0OCne5ISlncIHaGEbJA&q=nasi+kerabu+moknab+pantai+dalam&rlz=1C5GCCM_en&oq=Nasi+kerabu+mok&gs_lcrp=EgZjaHJvbWUqEAgBEC4YrwEYxwEYgAQYjgUyCggAEAAYsQMYgAQyEAgBEC4YrwEYxwEYgAQYjgUyCQgCEEUYORiABDITCAMQLhivARjHARixAxiABBiOBTIQCAQQLhivARjHARiABBiOBTIHCAUQABiABDIHCAYQABiABDIHCAcQABiABDIQCAgQLhivARjHARiABBiOBTIHCAkQABiABNIBCTEwMjI2ajFqN6gCCLACAQ&sourceid=chrome&ie=UTF-8#lrd=0x31cc4b0efd4060e5:0xe81904ed13213c4b,1,,,,'
driver = webdriver.Chrome()
driver.get(url)
time.sleep(5)
html = driver.page_source 
soup = BeautifulSoup(html, 'html.parser')
snippet = soup.find_all('span', {'class':'review-snippet'})
res = [rev.text for rev in snippet]
print(snippet[0])
print(snippet[0].text)
driver.quit()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


<span class="review-snippet">Ordered:<br/><br/>Nasi Kerabu Ayam Percik (1 set) + Cili Solok …</span>
Ordered:Nasi Kerabu Ayam Percik (1 set) + Cili Solok …


In [19]:
res[0]

'Ordered:Nasi Kerabu Ayam Percik (1 set) + Cili Solok …'

In [20]:
odf = pd.DataFrame(np.array(res), columns=['review'])

In [21]:
odf.head()

Unnamed: 0,review
0,Ordered:Nasi Kerabu Ayam Percik (1 set) + Cili...
1,I recently visited Nasi Kerabu Moknab Pantai D...
2,This place has always been our choice when it ...
3,My preference is more towards savoury meat how...
4,Long Q just for Nasi Kerabu.I’m here with coll...


In [22]:
odf['sentiment'] = odf['review'].apply(lambda x:sentiment_score(x[:512]))
odf

Unnamed: 0,review,sentiment
0,Ordered:Nasi Kerabu Ayam Percik (1 set) + Cili...,5
1,I recently visited Nasi Kerabu Moknab Pantai D...,5
2,This place has always been our choice when it ...,5
3,My preference is more towards savoury meat how...,3
4,Long Q just for Nasi Kerabu.I’m here with coll...,2
5,The best Nasi Kerabu in town. The potion of Na...,5
6,This is the place to look for an original Kela...,5


In [23]:
for row in odf.itertuples():
    score = sentiment_score(row.review)
    print(f'review: {row.review}\nScore: {score}')
    score_count.append(score)

score_count
average_score = np.mean(score_count)
print(f'Average Score: {average_score}')

review: Ordered:Nasi Kerabu Ayam Percik (1 set) + Cili Solok …
Score: 5
review: I recently visited Nasi Kerabu Moknab Pantai Dalam and was blown away by the authentic flavors and vibrant presentation of the dish. The blue rice, infused with butterfly pea flowers, was not only visually stunning but also delicious. The …
Score: 5
review: This place has always been our choice when it comes to nasi kerabu. The taste is similar to the one that i usually eat in Kota Bharu. Previously we only has to queue for around 10 to 15 minutes for take away. But nowadays since it became …
Score: 5
review: My preference is more towards savoury meat however the daging berlemak is on the sweater side (tasted more like satay daging). We ordered 4 nasi kerabu - daging bakar, lemak bakar, kambing and ayam bakar with 4 teh o ais limau (total cost …
Score: 3
review: Long Q just for Nasi Kerabu.I’m here with colleague eager to try Nasi Kerabu Lemak Bakar. So oily to my tongue. …
Score: 2
review: The best Nasi Ke