In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re
import warnings
warnings.filterwarnings(action='ignore')



### Instantiate Model

In [2]:
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

### Encode and Calculate the Sentiment

In [3]:
tokens = tokenizer.encode('this is okay but not great', return_tensors='pt')

In [4]:
tokens

tensor([[  101, 10372, 10127, 44810, 10158, 10502, 10497, 11838,   102]])

In [5]:
tokenizer.decode(tokens[0])

'[CLS] this is okay but not great [SEP]'

In [6]:
result = model(tokens)

In [7]:
result

SequenceClassifierOutput(loss=None, logits=tensor([[-1.5047,  0.9871,  3.0134,  0.4711, -2.5837]],
       grad_fn=<AddmmBackward>), hidden_states=None, attentions=None)

In [8]:
result.logits

tensor([[-1.5047,  0.9871,  3.0134,  0.4711, -2.5837]],
       grad_fn=<AddmmBackward>)

In [9]:
int(torch.argmax(result.logits))+1

3

### Scrapping reviews from Yelp

In [15]:
# agent = {"User-Agent":'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}
r = requests.get('https://www.yelp.com/biz/social-brew-cafe-pyrmont')
soup = BeautifulSoup(r.text, 'lxml')
regex = re.compile('.*comment.*')
results = soup.find_all('p', {'class':regex})
reviews = [result.text for result in results]

In [18]:
reviews[0]

'Great staff and food. \xa0Must try is the pan fried Gnocchi! \xa0The staff were really friendly and the coffee was good as well'

### Scoring scrapped reviews

In [19]:
def sentiment_score(review):
    tokens = tokenizer.encode('this is okay but not great', return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [20]:
import pandas as pd
import numpy as np

In [21]:
data = pd.DataFrame(np.array(reviews), columns=['review'])

In [24]:
data.head()

Unnamed: 0,review
0,Great staff and food. Must try is the pan fri...
1,It was ok. The coffee wasn't the best but it w...
2,I came to Social brew cafe for brunch while ex...
3,Ricotta hot cakes! These were so yummy. I ate ...
4,I went here a little while ago- a beautiful mo...


In [29]:
data['sentiment']  = data['review'].apply(lambda x:sentiment_score(x))

In [30]:
data

Unnamed: 0,review,sentiment
0,Great staff and food. Must try is the pan fri...,3
1,It was ok. The coffee wasn't the best but it w...,3
2,I came to Social brew cafe for brunch while ex...,3
3,Ricotta hot cakes! These were so yummy. I ate ...,3
4,I went here a little while ago- a beautiful mo...,3
5,We came for brunch twice in our week-long visi...,3
6,Ron & Jo are on the go down under and Wow! We...,3
7,Great coffee and vibe. That's all you need. C...,3
8,Great coffee and vibe. That's all you need. C...,3
9,Good coffee and toasts. Straight up and down -...,3
