# 1. Preparing libraries

In [1]:
from transformers import BertTokenizer, TFBertForSequenceClassification
import tensorflow as tf
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
import warnings

In [2]:
warnings.filterwarnings('ignore')

# 2. Instantiate model

In [3]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = BertTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
model = TFBertForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at nlptown/bert-base-multilingual-uncased-sentiment.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


# 3. Encode and Calculate Sentiment

In [4]:
tokens = tokenizer.encode("'It was good but could've been better. Great'",return_tensors='tf')

In [5]:
tokens

<tf.Tensor: shape=(1, 15), dtype=int32, numpy=
array([[  101,   112, 10197, 10140, 12050, 10502, 12296,   112, 10312,
        10662, 16197,   119, 11838,   112,   102]])>

In [6]:
tokenizer.decode(tokens[0])

"[CLS]'it was good but could've been better. great'[SEP]"

In [7]:
text = "'It was good but could've been better. Great'"
inputs = tokenizer(text,return_tensors='tf')
outputs = model(inputs)
sentiment = tf.argmax(outputs.logits,axis=1).numpy()+1
sentiment

array([4], dtype=int64)

In [8]:
inputs

{'input_ids': <tf.Tensor: shape=(1, 15), dtype=int32, numpy=
array([[  101,   112, 10197, 10140, 12050, 10502, 12296,   112, 10312,
        10662, 16197,   119, 11838,   112,   102]])>, 'token_type_ids': <tf.Tensor: shape=(1, 15), dtype=int32, numpy=array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])>, 'attention_mask': <tf.Tensor: shape=(1, 15), dtype=int32, numpy=array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])>}

In [9]:
outputs

TFSequenceClassifierOutput(loss=None, logits=<tf.Tensor: shape=(1, 5), dtype=float32, numpy=
array([[-2.5271244 , -0.9121963 ,  1.4682176 ,  1.7026639 ,  0.20254713]],
      dtype=float32)>, hidden_states=None, attentions=None)

In [10]:
print(sentiment)

[4]


In [11]:
print(sentiment[0])

4


# 4. Collect Reviews

In [12]:
r=requests.get('https://www.goodreads.com/book/show/40363665/reviews?reviewFilters={%22workId%22:%22kca://work/amzn1.gr.work.v1.ZzOL4ctk34q7z80LZy8RGw%22,%22languageCode%22:%22en%22,%22after%22:%22MTA0LDE1NTkxNjU4MDcwOTU%22}')
soup = BeautifulSoup(r.text,'html.parser')
results = soup.find_all('span',{'class':'Formatted'})
reviews = [result.text for result in results]

In [13]:
reviews

["I spent the last five months learning the math and theory behind machine learning, but when I finally tried to do something on a simple Kaggle set, I was drawing blanks. This book really showed me what I was missing: context. It doesn't just demonstrate different tools, it gives you a framework that you can apply to any problem (chapter 2) and how to think about what you're doing in each phase of an ML project. It doesn't baby you on the math, but it doesn't go deeper than it needs to either. I think the same can be said for the coding. This book is all about connecting and implementing the basics in a solid manner. For me, that's exactly what I'm looking for.It really has been the missing link for me on my self-study to connect theory to application and I'm really happy to have picked it up. If you feel like you're in a similar position, I highly recommend that you pick up a copy. I also recommend doing the coding exercises as you read them. It'll reinforce what you're learning and 

In [14]:
len(reviews)

30

# 5. Load Reviews into DataFrame and Score

In [15]:
df=pd.DataFrame(np.array(reviews),columns={"Review"})
df

Unnamed: 0,Review
0,I spent the last five months learning the math...
1,A really nice and sensible intro to some of th...
2,One of the best ML books out there. Dives deep...
3,I thought this book was a great overview of th...
4,great introduction into machine learning for b...
5,The book contains a chapter that shows a basic...
6,"""Machine Learning is the science (and art) of ..."
7,"At the time of reading, I had already learned ..."
8,This is the best book I've read on machine lea...
9,A very excellent introduction to many machine ...


In [16]:
df.Review.iloc[0]

"I spent the last five months learning the math and theory behind machine learning, but when I finally tried to do something on a simple Kaggle set, I was drawing blanks. This book really showed me what I was missing: context. It doesn't just demonstrate different tools, it gives you a framework that you can apply to any problem (chapter 2) and how to think about what you're doing in each phase of an ML project. It doesn't baby you on the math, but it doesn't go deeper than it needs to either. I think the same can be said for the coding. This book is all about connecting and implementing the basics in a solid manner. For me, that's exactly what I'm looking for.It really has been the missing link for me on my self-study to connect theory to application and I'm really happy to have picked it up. If you feel like you're in a similar position, I highly recommend that you pick up a copy. I also recommend doing the coding exercises as you read them. It'll reinforce what you're learning and a

In [17]:
def sentiment_score(review):
    inputs = tokenizer.encode(review,return_tensors='tf')
    result = model(inputs)
    sentiment = tf.argmax(result.logits,axis=1).numpy()+1
    return sentiment[0]

In [18]:
sentiment_score(df.Review.iloc[0])

5

In [19]:
df['Ratings'] = df['Review'].apply(lambda x:sentiment_score(x[:512]))
df

Unnamed: 0,Review,Ratings
0,I spent the last five months learning the math...,4
1,A really nice and sensible intro to some of th...,4
2,One of the best ML books out there. Dives deep...,5
3,I thought this book was a great overview of th...,4
4,great introduction into machine learning for b...,4
5,The book contains a chapter that shows a basic...,3
6,"""Machine Learning is the science (and art) of ...",1
7,"At the time of reading, I had already learned ...",4
8,This is the best book I've read on machine lea...,5
9,A very excellent introduction to many machine ...,5


In [20]:
np.round(df['Ratings'].mean(),2)

4.03