In [14]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re
import numpy as np
import pandas as pd

## Instantiate Model

In [5]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

Downloading:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/953 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/851k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/638M [00:00<?, ?B/s]

## Encode and Calculate Sentiment

In [6]:
tokens = tokenizer.encode('It was good but could have been better. Great', return_tensors='pt')

In [9]:
result = model(tokens)
result

SequenceClassifierOutput(loss=None, logits=tensor([[-2.7768, -1.2353,  1.4419,  1.9804,  0.4584]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [10]:
result.logits

tensor([[-2.7768, -1.2353,  1.4419,  1.9804,  0.4584]],
       grad_fn=<AddmmBackward0>)

In [11]:
int(torch.argmax(result.logits))+1

4

## Collect Reviews by Webscraping

In [12]:
r = requests.get('https://www.yelp.com/biz/social-brew-cafe-pyrmont')
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*comment.*')
results = soup.find_all('p', {'class':regex})
reviews = [result.text for result in results]

In [13]:
reviews

["Great coffee and vibe. That's all \xa0you need. Crab was outstanding but not good finger food like a taco should be. Really want to try the pork belly sandwich - looked excellent. This became my go to breakfast place in Darling harbor. Had the avocado salmon salad breakfast and it was excellent. Service has been excellent.",
 "Great coffee and vibe. That's all \xa0you need. Crab was outstanding but not good finger food like a taco should be. Really want to try the pork belly sandwich - looked excellent.",
 'I came to Social brew cafe for brunch while exploring the city and on my way to the aquarium. I sat outside. The service was great and the food was good too!I ordered smoked salmon, truffle fries, black coffee and beer.',
 "Ricotta hot cakes! These were so yummy. I ate them pretty fast and didn't share with anyone because they were that good ;). I ordered a green smoothie to balance it all out. Smoothie was a nice way to end my brekkie at this restaurant. Others with me ordered th

## Load Reviews into DataFrame and Score

In [15]:
df = pd.DataFrame(np.array(reviews), columns=['review'])

In [16]:
df.head()

Unnamed: 0,review
0,Great coffee and vibe. That's all you need. C...
1,Great coffee and vibe. That's all you need. C...
2,I came to Social brew cafe for brunch while ex...
3,Ricotta hot cakes! These were so yummy. I ate ...
4,I went here a little while ago- a beautiful mo...


In [17]:
df['review'].iloc[0]

"Great coffee and vibe. That's all \xa0you need. Crab was outstanding but not good finger food like a taco should be. Really want to try the pork belly sandwich - looked excellent. This became my go to breakfast place in Darling harbor. Had the avocado salmon salad breakfast and it was excellent. Service has been excellent."

### function to get sentiment score

In [18]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [19]:
sentiment_score(df['review'].iloc[1])

4

### generate new column with sentiment value

In [20]:
df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512]))

In [21]:
df.head()

Unnamed: 0,review,sentiment
0,Great coffee and vibe. That's all you need. C...,5
1,Great coffee and vibe. That's all you need. C...,4
2,I came to Social brew cafe for brunch while ex...,5
3,Ricotta hot cakes! These were so yummy. I ate ...,5
4,I went here a little while ago- a beautiful mo...,2
