In [1]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import requests
from bs4 import BeautifulSoup
import re

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#Autotokenizer sluzi za konvertovanje stringa u niz brojeva koji se kasnije prosledjuje u nlp model
#Automodel... daje arhitekturu da moze da se loaduje nlp model
#requests sluzi za vadjenje podataka sa nekog sajta
#beautifulsoup sluzi za prolazak kroz DOM podatke sa tog sajta

In [3]:
tokenizer = AutoTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = AutoModelForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

In [4]:
tokens = tokenizer.encode('I hated this, absolutely the worst', return_tensors='pt') #pt znaci pytorch

In [5]:
another_token = tokenizer.encode('This is crazy good, i think i will love this', return_tensors='pt')

In [6]:
tokenizer.decode(tokens[0])

'[CLS] i hated this, absolutely the worst [SEP]'

In [7]:
result = model(tokens)

In [8]:
result.logits

tensor([[ 4.8750,  1.7880, -0.8356, -3.0027, -2.0727]],
       grad_fn=<AddmmBackward0>)

In [9]:
int(torch.argmax(result.logits))+1 #ovo znaci da je sentiment teksta klasifikovan kao 1(najgora vrsta, jer ide od 1 do 5)

1

In [10]:
another_result = model(another_token)

In [11]:
int(torch.argmax(another_result.logits))+1

5

In [12]:
#izvlacimo preko regexa sve klase komentara na nekom sajtu(scraping)
r = requests.get('https://www.yelp.com/biz/mejico-sydney-2') #dohvatamo sajt, r.text je sve sa tog sajta sta se nalazi
soup = BeautifulSoup(r.text, 'html.parser')
regex = re.compile('.*comment.*') #trazimo klase "comment" jer se tu zapravo nalaze review-ovi
results = soup.find_all('p', {'class':regex}) # p znaci paragrafe trazimo, zatim trazimo sve sto je klase comment
reviews = [result.text for result in results] #izvlacimo samo text iz html dela

In [13]:
reviews[0]

"Seated without a booking on a super busy Saturday night. Lovely, warm, and Theo right hostess also looked after our table and went out of her way to give detailed ingredients in every dish to avoid allergies for one of us. And the food was great! Guacamole made right at our table, everything prepared with our allergies in mind, and great dish recommendations. We'd been visiting Sydney for about a week from Melbourne, and this was by far our best dining experience. I'd definitely return here in the future."

In [14]:
#sad cemo ubaciti review-ove u dataframe
import pandas as pd
import numpy as np

df = pd.DataFrame(np.array(reviews), columns=['review'])

In [15]:
df['review'].iloc[0]

"Seated without a booking on a super busy Saturday night. Lovely, warm, and Theo right hostess also looked after our table and went out of her way to give detailed ingredients in every dish to avoid allergies for one of us. And the food was great! Guacamole made right at our table, everything prepared with our allergies in mind, and great dish recommendations. We'd been visiting Sydney for about a week from Melbourne, and this was by far our best dining experience. I'd definitely return here in the future."

In [16]:
def sentiment_score(review):
    tokens=tokenizer.encode(review, return_tensors='pt')
    result = model(tokens)
    return int(torch.argmax(result.logits))+1

In [17]:
sentiment_score(df['review'].iloc[2])

2

In [18]:
df['sentiment'] = df['review'].apply(lambda x: sentiment_score(x[:512])) #nlp pipeline je limitirana sa koliko mozes tokena da posaljes(max je 512)

In [19]:
df.tail()

Unnamed: 0,review,sentiment
5,Don't come here expecting legit Mexican food b...,3
6,Out of all the restaurants that I tried in Syd...,5
7,"Great atmosphere, attentive service, solid mar...",3
8,We came here on a Thursday night @ 5pm and by ...,4
9,The food is fresh and tasty. The scallop cevi...,4


In [27]:
print(len(df))
df

10


Unnamed: 0,review,sentiment
0,Seated without a booking on a super busy Satur...,5
1,The food was decent not great.. We had the gu...,2
2,"Food was okay, guacamole was below average. Se...",2
3,The food and service here was really good. It...,5
4,Visiting from Texas and decided to give this r...,5
5,Don't come here expecting legit Mexican food b...,3
6,Out of all the restaurants that I tried in Syd...,5
7,"Great atmosphere, attentive service, solid mar...",3
8,We came here on a Thursday night @ 5pm and by ...,4
9,The food is fresh and tasty. The scallop cevi...,4
