# Import Libraries

In [None]:
!pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio===0.8.1 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
!pip install transformers requests beautifulsoup4 pandas numpy

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests 
from bs4 import BeautifulSoup
import re

In [None]:
import pandas as pd
import numpy as np

# Instantiate Model

In [None]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

# Test Model

In [None]:
token = tokenizer.encode('Really nice food, shame about the hair in the dish though.', return_tensors='pt')

In [None]:
result = model(token)

In [None]:
result.logits

In [None]:
int(torch.argmax(result.logits))+1

# Scrape Reviews

In [None]:
yelp_link = r'https://www.yelp.co.uk/biz/the-pantry-amsterdam'

In [None]:
r = requests.get(yelp_link)
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*comment.*')
results = soup.find_all('p', {'class':regex})
reviews = [result.text for result in results]

In [None]:
reviews[0]

In [None]:
df = pd.DataFrame(np.array(reviews), columns=['review'])

In [None]:
df

In [None]:
def sentiment_score(review):
    tokens = tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [None]:
df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512]))

In [None]:
sentiment_score(df['review'].iloc[0])

In [None]:
df['review'].iloc[2]

# Scrape Profile Names

In [None]:
r = requests.get(yelp_link)
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*user.*')
results = soup.find_all('a', {'href':regex})
profiles = [result.text for result in results]

In [None]:
profile_names = []
for name in profiles:
    if ". " in name:
        continue
    if "." in name:
        profile_names.append(name)

In [None]:
o = 1
for i in profile_names:
    print(str(o), ')', i )
    o = o + 1

In [None]:
profile_names

In [None]:
df['name'] = profile_names

# Scrape Review Date

In [None]:
r = requests.get(yelp_link)
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*css-chan6m.*')
results = soup.find_all('span', {'class':regex})
review_date = [result.text for result in results]

In [None]:
df['date'] = review_date[:10]

# Add ID & Save to CSV

In [None]:
df['bar_id'] = 1

In [None]:
scraped_reviews = df.reindex(columns=['bar_id', 'date', 'name', 'review', 'sentiment'])
scraped_reviews

In [None]:
scraped_reviews.to_csv(r'C:\Users\jesse\Sentiment Analysis Project\reviews.csv', index=0)