1. Install and Import Dependencies

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re

2. Instantiate Model

In [2]:
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

Downloading (…)okenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/669M [00:00<?, ?B/s]

3. Encode and Calculate Sentiment

In [53]:
tokens = tokenizer.encode("This is pathetic, I hate it! ", return_tensors="pt")
tokens

tensor([[  101, 10372, 10127, 26584, 89110,   117,   151, 39487, 10197,   106,
           102]])

In [54]:
tokenizer.decode(tokens[0])

'[CLS] this is pathetic, i hate it! [SEP]'

In [55]:
results = model(tokens)

In [56]:
results

SequenceClassifierOutput(loss=None, logits=tensor([[ 4.7519,  1.3726, -1.0938, -2.6252, -1.6450]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [57]:
results.logits

tensor([[ 4.7519,  1.3726, -1.0938, -2.6252, -1.6450]],
       grad_fn=<AddmmBackward0>)

In [58]:
# Gives the highest result
int(torch.argmax(results.logits))+1

1

4. Collect Reviews

In [24]:
# Build scraper
r = requests.get("https://www.yelp.com/biz/mejico-sydney-2")
soup = BeautifulSoup(r.text, "html.parser")
regex = re.compile(".*comment.*")
result = soup.find_all("p", {"class": regex})
reviews = [results.text for results in result]

In [28]:
result[0].text

'Visiting from Texas and decided to give this restaurant a try. \xa0We were pleasantly surprised. \xa0While the margaritas are more like martinis, the food was excellent. \xa0More like a tapas, Mexican fusion. \xa0Great way to try different plates.The real treat was Chelsea, our waitress. \xa0Took the time to explain the menu and offer suggestions. \xa0Always smiling and very pleasant. \xa0Best service we have had in Sydney!'

In [29]:
reviews

['Visiting from Texas and decided to give this restaurant a try. \xa0We were pleasantly surprised. \xa0While the margaritas are more like martinis, the food was excellent. \xa0More like a tapas, Mexican fusion. \xa0Great way to try different plates.The real treat was Chelsea, our waitress. \xa0Took the time to explain the menu and offer suggestions. \xa0Always smiling and very pleasant. \xa0Best service we have had in Sydney!',
 'The food and service here was really good. \xa0It was more like tapas food than Mexican food! \xa0The drinks were amazing too!',
 'Great atmosphere, attentive service, solid margs, and a Tasty menu. The Brisket Tacos were substantial and delicious. The corn ribs??? \xa0Fawgetaboutit! \xa0Unreal. \xa0Wanted to order another plate.',
 "Don't come here expecting legit Mexican food but a modern twist on some staples. Loud party area, fun drinks and friendly staff make this a hip meeting area for large groups. Drinks were better than the food. They stuff the famili

5. Load Reviews into DataFrame and Score

In [30]:
import pandas as pd
import numpy as np

In [31]:
df = pd.DataFrame(np.array(reviews), columns = ["review"])

In [32]:
df.head()

Unnamed: 0,review
0,Visiting from Texas and decided to give this r...
1,The food and service here was really good. It...
2,"Great atmosphere, attentive service, solid mar..."
3,Don't come here expecting legit Mexican food b...
4,Out of all the restaurants that I tried in Syd...


In [34]:
df["review"].iloc[0]

'Visiting from Texas and decided to give this restaurant a try. \xa0We were pleasantly surprised. \xa0While the margaritas are more like martinis, the food was excellent. \xa0More like a tapas, Mexican fusion. \xa0Great way to try different plates.The real treat was Chelsea, our waitress. \xa0Took the time to explain the menu and offer suggestions. \xa0Always smiling and very pleasant. \xa0Best service we have had in Sydney!'

In [35]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors = "pt")
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [43]:
sentiment_score(df["review"].iloc[0])

5

In [44]:
df["sentiment"] = df["review"].apply(lambda x: sentiment_score(x[:512]))

In [47]:
df

Unnamed: 0,review,sentiment
0,Visiting from Texas and decided to give this r...,5
1,The food and service here was really good. It...,5
2,"Great atmosphere, attentive service, solid mar...",3
3,Don't come here expecting legit Mexican food b...,3
4,Out of all the restaurants that I tried in Syd...,5
5,We came here on a Thursday night @ 5pm and by ...,4
6,The food is fresh and tasty. The scallop cevi...,4
7,Have been here twice and have absolutely loved...,5
8,I was pleasantly surprised at what a great job...,5
9,If you're looking for a quiet little romantic ...,2
