### 1. Install and Import Dependencies

In [1]:
!pip install -q transformers

[K     |████████████████████████████████| 5.5 MB 9.2 MB/s 
[K     |████████████████████████████████| 7.6 MB 49.1 MB/s 
[K     |████████████████████████████████| 163 kB 65.5 MB/s 
[?25h

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re

### 2. Instantiate Model

In [3]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

Downloading:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/953 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/872k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/669M [00:00<?, ?B/s]

### 3. Encode and Calculate Sentiment

In [10]:
tokens1 = tokenizer.encode('I hated this, absolutely the worst', return_tensors = 'pt')
tokens2 = tokenizer.encode('This is amazing, I loved it. GREAT!', return_tensors = 'pt')

In [11]:
tokens1, tokens2

(tensor([[  101,   151, 39487, 10163, 10372,   117, 35925, 10563, 10103, 43060,
            102]]),
 tensor([[  101, 10372, 10127, 39854,   117,   151, 46747, 10197,   119, 11838,
            106,   102]]))

In [12]:
tokenizer.decode(tokens1[0]), tokenizer.decode(tokens2[0])

('[CLS] i hated this, absolutely the worst [SEP]',
 '[CLS] this is amazing, i loved it. great! [SEP]')

In [14]:
result1 = model(tokens1)
result2 = model(tokens2)
result1, result2   

(SequenceClassifierOutput(loss=None, logits=tensor([[ 4.8750,  1.7880, -0.8356, -3.0027, -2.0727]],
        grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None),
 SequenceClassifierOutput(loss=None, logits=tensor([[-2.5497, -2.9072, -1.4034,  1.4186,  4.3728]],
        grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None))

In [15]:
int(torch.argmax(result1.logits))+1, int(torch.argmax(result2.logits))+1 ## 1 being the most negative, and 5 being the most positive

(1, 5)

In [16]:
### Let's take a neutral example with the integer code of 3
tokens3 = tokenizer.encode('Meh, it was okay!', return_tensors = 'pt')
tokens3

tensor([[  101, 10525, 10243,   117, 10197, 10140, 44810, 10158,   106,   102]])

In [17]:
result3 = model(tokens3)
int(torch.argmax(result3.logits))+1

3

### 4. Collect Reviews

In [20]:
r = requests.get('https://www.yelp.com/biz/mejico-sydney-2')
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*comment.*')
results = soup.find_all('p', {'class': regex})
reviews = [result.text for result in results]

In [22]:
results[0].text

'Great atmosphere, attentive service, solid margs, and a Tasty menu. The Brisket Tacos were substantial and delicious. The corn ribs??? \xa0Fawgetaboutit! \xa0Unreal. \xa0Wanted to order another plate.'

In [25]:
reviews[3]

'The food is fresh and tasty. \xa0The scallop ceviche started the lunch. The scallops were tender with a great acidity and use of mango and peppers. The steak was tender and I got the hint of tequila in the sauce. I enjoyed a watermelon salad that complimented the the steak. The portions are good, but a stretch if you are sharing. My only down point is the service. They really only showed up to present my next plate and never checked to see if I wanted another drink (which I did).Enjoyed the food.'

### 5. Load Reviews into DataFrame and Score

In [29]:
import pandas as pd
import numpy as np

In [30]:
df = pd.DataFrame(np.array(reviews), columns=['review'])
df

Unnamed: 0,review
0,"Great atmosphere, attentive service, solid mar..."
1,Don't come here expecting legit Mexican food b...
2,Out of all the restaurants that I tried in Syd...
3,The food is fresh and tasty. The scallop cevi...
4,We came here on a Thursday night @ 5pm and by ...
5,Have been here twice and have absolutely loved...
6,I was pleasantly surprised at what a great job...
7,Really nice (upmarket) Mexican restaurant. Goo...
8,If you're looking for a quiet little romantic ...
9,The service at this place was top notch - the ...


In [31]:
df.tail()

Unnamed: 0,review
5,Have been here twice and have absolutely loved...
6,I was pleasantly surprised at what a great job...
7,Really nice (upmarket) Mexican restaurant. Goo...
8,If you're looking for a quiet little romantic ...
9,The service at this place was top notch - the ...


In [34]:
df['review'].iloc[2]

"Out of all the restaurants that I tried in Sydney, this was definitely the most reasonably priced one offering good food of course. We started off with Margarita's - they have $10 margaritas on Mondays (or perhaps all weekdays). We lost count of how many we had, they were so good. On to the food, we tried a little bit of everything - we ordered corn lollipops, jalapeño poppers, grilled halloumi, batata bravas to start and we shared 2 items from the grill which I can't remember (probably because of the margarita's). We ended with churros & chocolate sauce which was awesome. Service was good and the staff waiting us was very friendly. He also recommended us portions for certain items to match our party size. For instance, we didn't have to order 2 portions of an appetizer instead we could order one and a half portion which I think was great. It allowed us to chose more dishes than more quantity of the same dish."

In [35]:
def sentiment_score(review):  # function to run the sentiment pipeline
    tokens = tokenizer.encode(review, return_tensors = 'pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [37]:
sentiment_score(df['review'].iloc[0])

3

In [38]:
df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512])) # NLP pipeline limitation : 512

In [44]:
df

Unnamed: 0,review,sentiment
0,"Great atmosphere, attentive service, solid mar...",3
1,Don't come here expecting legit Mexican food b...,3
2,Out of all the restaurants that I tried in Syd...,5
3,The food is fresh and tasty. The scallop cevi...,4
4,We came here on a Thursday night @ 5pm and by ...,4
5,Have been here twice and have absolutely loved...,5
6,I was pleasantly surprised at what a great job...,5
7,Really nice (upmarket) Mexican restaurant. Goo...,4
8,If you're looking for a quiet little romantic ...,2
9,The service at this place was top notch - the ...,5


### Taking another restaurant example

In [45]:
r = requests.get('https://www.yelp.com/biz/social-brew-cafe-pyrmont')
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*comment.*')
results = soup.find_all('p', {'class': regex})
reviews = [result.text for result in results]

In [46]:
df = pd.DataFrame(np.array(reviews), columns=['review'])
df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512])) # NLP pipeline limitation : 512
df

Unnamed: 0,review,sentiment
0,Great food amazing coffee and tea. Short walk ...,5
1,Great staff and food. Must try is the pan fri...,5
2,Ricotta hot cakes! These were so yummy. I ate ...,5
3,I came to Social brew cafe for brunch while ex...,5
4,It was ok. The coffee wasn't the best but it w...,3
5,We came for brunch twice in our week-long visi...,4
6,I went here a little while ago- a beautiful mo...,2
7,Ron & Jo are on the go down under and Wow! We...,5
8,Great coffee and vibe. That's all you need. C...,5
9,Great coffee and vibe. That's all you need. C...,4
