# 1. Install and Import Dependecies

In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re

# 2. Instantiate Model

In [3]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')


Downloading pytorch_model.bin:   0%|          | 0.00/669M [00:00<?, ?B/s]

In [4]:
tokenizer

BertTokenizerFast(name_or_path='nlptown/bert-base-multilingual-uncased-sentiment', vocab_size=105879, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [5]:
model

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(105879, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12

# 3. Encode and Calculate Sentiment

In [8]:
tokens = tokenizer.encode('it was good, sex is awsome', return_tensors='pt')

In [9]:
result = model(tokens)

In [10]:
result.logits

tensor([[-1.5759, -0.8759,  0.8581,  1.1086,  0.3553]],
       grad_fn=<AddmmBackward0>)

In [11]:
int(torch.argmax(result.logits))+1

4

# 4. Collect Reviews

In [12]:
r = requests.get('https://www.yelp.com/biz/marufuku-ramen-san-francisco-5')
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*comment.*')
results = soup.find_all('p', {'class':regex})
reviews = [result.text for result in results]

In [13]:
results

[<p class="comment__09f24__D0cxf css-qgunke"><span class="raw__09f24__T4Ezm" lang="en">The ramen was amazing. The wait was a while but it was definitely  worth the wait.  Looking forward to going back.</span></p>,
 <p class="comment__09f24__D0cxf css-qgunke"><span class="raw__09f24__T4Ezm" lang="en">Great ramen, fast and quick service. Inside of the mall but really cool shops nearby.</span></p>,
 <p class="comment__09f24__D0cxf css-qgunke"><span class="raw__09f24__T4Ezm" lang="en">Today was the second time I've been in this location Had an amazing time <br/>The staff was on point Food was great <br/>Got to hit up the one in Oakland <br/><br/>Thanks</span></p>,
 <p class="comment__09f24__D0cxf css-qgunke"><span class="raw__09f24__T4Ezm" lang="en">I go here very frequently. The food is fantastic. The wait times are not too bad. The menu changes ever-so-slightly every once in awhile, which is nice (they now offer gluten free noodles, yay!). <br/><br/>More recently, however, I have brought

In [14]:
reviews

['The ramen was amazing. The wait was a while but it was definitely  worth the wait.  Looking forward to going back.',
 'Great ramen, fast and quick service. Inside of the mall but really cool shops nearby.',
 "Today was the second time I've been in this location Had an amazing time The staff was on point Food was great Got to hit up the one in Oakland Thanks",
 "I go here very frequently. The food is fantastic. The wait times are not too bad. The menu changes ever-so-slightly every once in awhile, which is nice (they now offer gluten free noodles, yay!). More recently, however, I have brought guests visiting SF and almost every time it seems like the staff is trying to rush us out. As soon as we sit down, there a three people coming by within the first 5 minutes to take our order. It feels a little suffocating. Multiple times while I've been enjoying lunch and dinner with one of my guests, they've tried to take our dishes while we're clearly still eating - we just came up for air to t

# 5. Load Reviews Into Dataframe and Score

In [15]:
import numpy as np
import pandas as pd

In [17]:
df = pd.DataFrame(np.array(reviews), columns=['review'])
df['review'].iloc[0]

'The ramen was amazing. The wait was a while but it was definitely  worth the wait.  Looking forward to going back.'

In [18]:
df

Unnamed: 0,review
0,The ramen was amazing. The wait was a while bu...
1,"Great ramen, fast and quick service. Inside of..."
2,Today was the second time I've been in this lo...
3,I go here very frequently. The food is fantast...
4,Some of the best ramen I've had. My partner an...
5,I came here for a weekday night dinner and mos...
6,I may be one of the outliers regarding this es...
7,Join the Yelp waitlist through their tablet at...
8,Stopped by for lunch on a Friday and there was...
9,"Delicious ramen and great service, what more c..."


In [19]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [20]:
sentiment_score(df['review'].iloc[1])

5

In [21]:
df['sentiment'] = df.review.apply(lambda x: sentiment_score(x[:512]))

In [22]:
df

Unnamed: 0,review,sentiment
0,The ramen was amazing. The wait was a while bu...,4
1,"Great ramen, fast and quick service. Inside of...",5
2,Today was the second time I've been in this lo...,5
3,I go here very frequently. The food is fantast...,3
4,Some of the best ramen I've had. My partner an...,5
5,I came here for a weekday night dinner and mos...,5
6,I may be one of the outliers regarding this es...,4
7,Join the Yelp waitlist through their tablet at...,4
8,Stopped by for lunch on a Friday and there was...,5
9,"Delicious ramen and great service, what more c...",5
