In [None]:
# !pip install -U -q transformers

In [1]:
# if error in BERT model init do
# !pip uninstall -y transformers accelerate
!pip install transformers accelerate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.29.2-py3-none-any.whl (7.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m48.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.19.0-py3-none-any.whl (219 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m219.1/219.1 kB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [3

In [2]:
!pip show transformers

Name: transformers
Version: 4.29.2
Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow
Home-page: https://github.com/huggingface/transformers
Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)
Author-email: transformers@huggingface.co
License: Apache 2.0 License
Location: /usr/local/lib/python3.10/dist-packages
Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, tokenizers, tqdm
Required-by: 


In [3]:
!curl -X GET https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_Video_Games_v1_00.tsv.gz -o video_games.tsv.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  453M  100  453M    0     0  89.7M      0  0:00:05  0:00:05 --:--:-- 97.3M


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report
from sklearn.metrics import mean_squared_error
import torch
import transformers
import seaborn as sns
import matplotlib.pyplot as plt

In [5]:
# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla T4


In [6]:
# read data into pandas dataframe
path = "video_games.tsv.gz"
video_games = pd.read_csv(path, sep="\t", verbose=True, parse_dates=[14], on_bad_lines="skip")

Tokenization took: 504.42 ms
Type conversion took: 361.96 ms
Parser memory cleanup took: 0.01 ms
Tokenization took: 460.12 ms
Type conversion took: 292.59 ms
Parser memory cleanup took: 0.01 ms
Tokenization took: 541.19 ms
Type conversion took: 404.21 ms
Parser memory cleanup took: 0.01 ms
Tokenization took: 526.63 ms
Type conversion took: 249.48 ms
Parser memory cleanup took: 0.01 ms
Tokenization took: 419.00 ms
Type conversion took: 331.30 ms
Parser memory cleanup took: 0.01 ms
Tokenization took: 779.66 ms
Type conversion took: 593.04 ms
Parser memory cleanup took: 0.01 ms
Tokenization took: 1251.37 ms
Type conversion took: 719.45 ms
Parser memory cleanup took: 0.01 ms
Tokenization took: 885.18 ms
Type conversion took: 330.18 ms
Parser memory cleanup took: 0.01 ms
Tokenization took: 513.94 ms
Type conversion took: 305.52 ms
Parser memory cleanup took: 0.01 ms
Tokenization took: 637.64 ms
Type conversion took: 349.90 ms
Parser memory cleanup took: 0.01 ms
Tokenization took: 570.93 ms


In [7]:
video_games.head()

Unnamed: 0,marketplace,customer_id,review_id,product_id,product_parent,product_title,product_category,star_rating,helpful_votes,total_votes,vine,verified_purchase,review_headline,review_body,review_date
0,US,12039526,RTIS3L2M1F5SM,B001CXYMFS,737716809,Thrustmaster T-Flight Hotas X Flight Stick,Video Games,5,0,0,N,Y,an amazing joystick. I especially love that yo...,"Used this for Elite Dangerous on my mac, an am...",2015-08-31
1,US,9636577,R1ZV7R40OLHKD,B00M920ND6,569686175,Tonsee 6 buttons Wireless Optical Silent Gamin...,Video Games,5,0,0,N,Y,Definitely a silent mouse... Not a single clic...,"Loved it, I didn't even realise it was a gami...",2015-08-31
2,US,2331478,R3BH071QLH8QMC,B0029CSOD2,98937668,Hidden Mysteries: Titanic Secrets of the Fatef...,Video Games,1,0,1,N,Y,One Star,poor quality work and not as it is advertised.,2015-08-31
3,US,52495923,R127K9NTSXA2YH,B00GOOSV98,23143350,GelTabz Performance Thumb Grips - PlayStation ...,Video Games,3,0,0,N,Y,"good, but could be bettee","nice, but tend to slip away from stick in inte...",2015-08-31
4,US,14533949,R32ZWUXDJPW27Q,B00Y074JOM,821342511,Zero Suit Samus amiibo - Japan Import (Super S...,Video Games,4,0,0,N,Y,Great but flawed.,"Great amiibo, great for collecting. Quality ma...",2015-08-31


In [8]:
# filter by last two years based on review_date
from datetime import datetime

video_games['review_date'] = pd.to_datetime(video_games['review_date'])
video_games = video_games[video_games['review_date'] >= datetime(2015, 1, 1)] # 8 months - maybe we need a full year? If so, we need to sample stratified from 2014-8-31

In [9]:
video_games.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 330966 entries, 0 to 330970
Data columns (total 15 columns):
 #   Column             Non-Null Count   Dtype         
---  ------             --------------   -----         
 0   marketplace        330966 non-null  object        
 1   customer_id        330966 non-null  int64         
 2   review_id          330966 non-null  object        
 3   product_id         330966 non-null  object        
 4   product_parent     330966 non-null  int64         
 5   product_title      330966 non-null  object        
 6   product_category   330966 non-null  object        
 7   star_rating        330966 non-null  int64         
 8   helpful_votes      330966 non-null  int64         
 9   total_votes        330966 non-null  int64         
 10  vine               330966 non-null  object        
 11  verified_purchase  330966 non-null  object        
 12  review_headline    330966 non-null  object        
 13  review_body        330917 non-null  object  

In [10]:
video_games = video_games[["review_headline", "review_body", "star_rating"]]

video_games.head()

Unnamed: 0,review_headline,review_body,star_rating
0,an amazing joystick. I especially love that yo...,"Used this for Elite Dangerous on my mac, an am...",5
1,Definitely a silent mouse... Not a single clic...,"Loved it, I didn't even realise it was a gami...",5
2,One Star,poor quality work and not as it is advertised.,1
3,"good, but could be bettee","nice, but tend to slip away from stick in inte...",3
4,Great but flawed.,"Great amiibo, great for collecting. Quality ma...",4


In [11]:
# concat headline and review body
video_games['full_review'] = video_games['review_headline'] + ' ' + video_games['review_body']
video_games.head()

Unnamed: 0,review_headline,review_body,star_rating,full_review
0,an amazing joystick. I especially love that yo...,"Used this for Elite Dangerous on my mac, an am...",5,an amazing joystick. I especially love that yo...
1,Definitely a silent mouse... Not a single clic...,"Loved it, I didn't even realise it was a gami...",5,Definitely a silent mouse... Not a single clic...
2,One Star,poor quality work and not as it is advertised.,1,One Star poor quality work and not as it is ad...
3,"good, but could be bettee","nice, but tend to slip away from stick in inte...",3,"good, but could be bettee nice, but tend to sl..."
4,Great but flawed.,"Great amiibo, great for collecting. Quality ma...",4,"Great but flawed. Great amiibo, great for coll..."


In [12]:
video_games = video_games[['full_review', 'star_rating']]
video_games.head()

Unnamed: 0,full_review,star_rating
0,an amazing joystick. I especially love that yo...,5
1,Definitely a silent mouse... Not a single clic...,5
2,One Star poor quality work and not as it is ad...,1
3,"good, but could be bettee nice, but tend to sl...",3
4,"Great but flawed. Great amiibo, great for coll...",4


# Stratified Sampling

In [13]:
from sklearn.model_selection import StratifiedShuffleSplit

In [14]:
from sklearn.model_selection import StratifiedShuffleSplit

# Assuming you have a DataFrame called 'full_dataset' with the complete dataset

# Define the desired sampling size
sample_size = 30000  # Number of records to sample

# Specify the column to use for stratification
stratify_column = 'star_rating'  # Replace with the actual column name

# Create a StratifiedShuffleSplit object
stratified_split = StratifiedShuffleSplit(n_splits=1, test_size=sample_size, random_state=42)

# Generate the indices for stratified sampling
indices = stratified_split.split(video_games, video_games[stratify_column])

# Extract the indices for the sampled records
sampled_indices = next(indices)[1]

# Create the sampled DataFrame based on the indices
video_games = video_games.iloc[sampled_indices].reset_index(drop=False)


In [15]:
video_games.head()

Unnamed: 0,index,full_review,star_rating
0,145452,"This is my favorite game, ever This is my favo...",5
1,295636,Five Stars Everything went smooth sailing! :),5
2,207743,This is a wonderful alternative to the officia...,5
3,79712,Five Stars VERY GOOD GAME TO PLAY,5
4,243959,Five Stars excellent,5


# Train- Test-Split

In [16]:
# create train, validation and test splits
train_df, test_df = train_test_split(video_games, test_size=0.3)
train_df, val_df = train_test_split(train_df, test_size=0.3)

print(len(train_df))
print(len(val_df))
print(len(test_df))

14700
6300
9000


In [17]:
train_df.head()

Unnamed: 0,index,full_review,star_rating
4450,236925,Five Stars Came in beautiful packaging. Feels ...,5
16798,250733,Best game I've ever played. So at this point m...,5
29841,5316,Bloody Brilliant! I found this game at a Vinta...,5
246,219633,Five Stars Great kit,5
2202,199669,Five Stars Great game thanks 😀😃,5


In [18]:
val_df.head()

Unnamed: 0,index,full_review,star_rating
14882,48724,Amazingly Fun! For J-Pop and Rhythm lovers tha...,4
1491,198734,it's a controller. but it's a RED controller. ...,5
19499,312083,Five Stars Fun,5
16548,303704,Good to have for collection For your legancy o...,5
24770,158669,"I play 360 a lot online, always got feedback ....",5


In [19]:
test_df.head()

Unnamed: 0,index,full_review,star_rating
17718,167010,Five Stars Recived it in 3 day works graet,5
6593,24019,Junk Worked for 3 days and stopped lighting up...,1
3116,96321,"Great looking figure, but wish it could be use...",4
10487,138467,What went wrong? I was very excited hearing th...,1
20775,276480,Three Stars Decent,3


In [20]:
# prepare input as list instances
X_train = train_df['full_review'].tolist()
y_train = np.array(train_df['star_rating']) -1 # need to substract 1 since BERT expects labels starting from 0

X_val = val_df['full_review'].tolist()
y_val = np.array(val_df['star_rating']) - 1

X_test = test_df['full_review'].tolist()
y_test = np.array(test_df['star_rating']) - 1

In [21]:
print(y_train)

[4 4 4 ... 4 4 0]


# Tokenization

In [22]:
from transformers import BertTokenizer

In [23]:
import random
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)

In [24]:
# Specify the pre-trained model name.
PRE_TRAINED_MODEL_NAME = 'bert-base-uncased' # uncased means this tokenizer will also lower-case automatically

# Load the BERT tokenizer.
print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained(PRE_TRAINED_MODEL_NAME)

Loading BERT tokenizer...


Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [25]:
# We need the class of type Dataset for input in the Trainer function
# we can write a class: with the input texts, labels, tokenizer and max_len
# we'll have the full_review, input_ids, attention_mask, labels as our output
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader

class AmazonReviewDataset(Dataset):

    def __init__(self, full_review, labels, tokenizer, max_len):
        self.full_review = full_review
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.full_review)

    def __getitem__(self, item):
        full_review = str(self.full_review[item])
        label = self.labels[item]
        # `encode_plus` will:
          #   (1) Tokenize the text.
          #   (2) Prepend the `[CLS]` token to the start.
          #   (3) Append the `[SEP]` token to the end.
          #   (4) Map tokens to their IDs.
          #   (5) Pad or truncate the sentence to `max_length` with [PAD] tokens
          #   (6) Create attention masks for [PAD] tokens
        encoding = self.tokenizer.encode_plus(
          full_review,
          add_special_tokens=True, # Add '[CLS]' and '[SEP]' and [PAD]
          max_length=self.max_len, # Pad & truncate all texts
          truncation=True,
          padding='max_length',
          return_token_type_ids=False,
          return_attention_mask=True, # Construct attention masks
          return_tensors='pt', # Return pytorch tensors
        )

        return {
          'news_text': full_review,
          'input_ids': encoding['input_ids'].flatten(),
          'attention_mask': encoding['attention_mask'].flatten(),
          'labels': torch.tensor(label, dtype=torch.long)
        }

# Training

In [26]:
from transformers import TrainingArguments, Trainer, BertForSequenceClassification, BertConfig
import torch
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

In [27]:
def objective():
  # BERT model to tune
  # ------------------------------
  # PRE_TRAINED_MODEL_NAME = model

  # Hyperparameters
  num_train_epochs = 1 # authors of BERT recommend between 2 and 4 epochs
  per_device_train_batch_size = 8 # batch sizes between 8 and 32
  per_device_eval_batch_size = 8 # batch sizes between 8 and 32
  learning_rate = 5e-5 # Intervall, z.B.: 0.000001 bis 0.0001 (möglich?)
  
  # Experimentation with different optimzer ("adam", "sgd" etc) / scheduler combinations
  # add to "optimizers" parameter in Trainer class
  # ------------------------------------------------------------------------------------
  # optimizer = "adamw" # default in Trainer
  # scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=num_train_steps) # default in trainer

  training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch", num_train_epochs=num_train_epochs, per_device_train_batch_size=per_device_train_batch_size, per_device_eval_batch_size=per_device_eval_batch_size, learning_rate=learning_rate)
  max_len = 256 # max number of token inputs

  def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1) # bert outputs a raw representation of its confidence as "logits"
    probabilities = torch.nn.functional.softmax(torch.from_numpy(logits), dim=-1) # extract relative probabilites of class
    predicted_probabilities = torch.max(probabilities, axis=-1).values # get highest relative probability
    return { 'accuracy': accuracy_score(labels, predictions), 
            'recall_score': recall_score(labels, predictions, average='weighted'), 
            'precision_score': precision_score(labels, predictions, average='weighted'), 
            'f1_score': f1_score(labels, predictions, average='weighted'), 
            'highest_probability': predicted_probabilities 
            }

  model = BertForSequenceClassification.from_pretrained(
      PRE_TRAINED_MODEL_NAME, # Use the 12-layer BERT model, with an uncased vocab.
      num_labels = 5, # The number of output labels, in our case it's multi-class tasks with classes=5  
      output_attentions = False, # Whether the model returns attentions weights.
      output_hidden_states = False, # Whether the model returns all hidden-states.
  )

  trainer = Trainer(
      model=model,
      args=training_args,
      train_dataset=AmazonReviewDataset(full_review=np.array(X_train), labels=np.array(y_train), tokenizer=tokenizer, max_len=max_len),
      eval_dataset=AmazonReviewDataset(full_review=np.array(X_val), labels=np.array(y_val), tokenizer=tokenizer, max_len=max_len),
      compute_metrics=compute_metrics
      # optimizers = (optimizer, scheduler) # uncomment if you want to use another optimizer scheduler than the default in the trainer
  )

  # training, evaluation and prediction loops can be done with one simple line of code:
  trainer.train()
  eval_metrics = trainer.evaluate()
  pred_tuple = trainer.predict(AmazonReviewDataset(full_review=np.array(X_test), labels=np.array(y_test), tokenizer=tokenizer, max_len=max_len))

  return pred_tuple, eval_metrics

In [28]:
pred_tuple, eval_metrics = objective()

Downloading pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch,Training Loss,Validation Loss,Accuracy,Recall Score,Precision Score,F1 Score,Highest Probability
1,0.4173,0.372585,0.866032,0.866032,0.860646,0.861617,"tensor([0.8837, 0.3362, 0.9995, ..., 0.9995, 0.9995, 0.9951])"


Trainer is attempting to log a value of "tensor([0.8837, 0.3362, 0.9995,  ..., 0.9995, 0.9995, 0.9951])" of type <class 'torch.Tensor'> for key "eval/highest_probability" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


Trainer is attempting to log a value of "tensor([0.8837, 0.3362, 0.9995,  ..., 0.9995, 0.9995, 0.9951])" of type <class 'torch.Tensor'> for key "eval/highest_probability" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.


In [29]:
pred_tuple

PredictionOutput(predictions=array([[-1.7538868 , -2.064655  , -2.3693845 , -1.1951982 ,  7.332438  ],
       [ 3.7457824 ,  0.26243013, -0.9371143 , -1.4357742 , -1.0766631 ],
       [-2.7227695 , -1.3788401 ,  0.1660705 ,  4.9366093 , -1.4332613 ],
       ...,
       [-0.12779881, -0.9580643 , -0.36368844, -0.31890026,  1.2519088 ],
       [ 5.2127666 , -0.26378718, -1.2070124 , -1.6811727 , -1.0749111 ],
       [-2.0691159 , -1.6000813 , -1.0565428 ,  5.739979  , -1.3887869 ]],
      dtype=float32), label_ids=array([4, 0, 3, ..., 0, 0, 3]), metrics={'test_loss': 0.3757476806640625, 'test_accuracy': 0.8643333333333333, 'test_recall_score': 0.8643333333333333, 'test_precision_score': 0.8588350781878905, 'test_f1_score': 0.8595961058380999, 'test_highest_probability': tensor([0.9995, 0.9491, 0.9877,  ..., 0.5656, 0.9914, 0.9970]), 'test_runtime': 150.2004, 'test_samples_per_second': 59.92, 'test_steps_per_second': 7.49})

In [30]:
eval_metrics

{'eval_loss': 0.37258461117744446,
 'eval_accuracy': 0.866031746031746,
 'eval_recall_score': 0.866031746031746,
 'eval_precision_score': 0.8606459572061831,
 'eval_f1_score': 0.8616174456247637,
 'eval_highest_probability': tensor([0.8837, 0.3362, 0.9995,  ..., 0.9995, 0.9995, 0.9951]),
 'eval_runtime': 104.6965,
 'eval_samples_per_second': 60.174,
 'eval_steps_per_second': 7.527,
 'epoch': 1.0}

# Inspect sentiment predictions

In [46]:
predicted_labels = np.argmax(pred_tuple[0], axis=1)
actual_labels = pred_tuple[1]
print(predicted_labels)
print(actual_labels)

[4 0 3 ... 4 0 3]
[4 0 3 ... 0 0 3]


In [52]:
predictions_df = pd.DataFrame(np.argmax(pred_tuple[0], axis=1))
actual_df = pd.DataFrame(pred_tuple[1])
highest_probability = pd.DataFrame(pred_tuple[2]['test_highest_probability'])

In [34]:
X_test = pd.DataFrame(X_test)

In [56]:
combined_df = pd.concat([X_test, predictions_df, actual_df, highest_probability], axis=1, keys=['Full Review', 'Predictions', 'Actual Labels', 'Highest Probability'])

In [57]:
combined_df.columns = ['Full Review', 'Predicted', 'Actual', 'Highest Probability']

In [58]:
combined_df

Unnamed: 0,Full Review,Predicted,Actual,Highest Probability
0,Five Stars Recived it in 3 day works graet,4,4,0.999545
1,Junk Worked for 3 days and stopped lighting up...,0,0,0.949106
2,"Great looking figure, but wish it could be use...",3,3,0.987686
3,What went wrong? I was very excited hearing th...,4,0,0.352503
4,Three Stars Decent,2,2,0.994955
...,...,...,...,...
8995,The. nintendo. ds. is. a. ... The.nintendo.ds....,0,0,0.909742
8996,Amazing! Shocking since the beggining!,4,4,0.981472
8997,We just bought this game for our 13 yr. ... We...,4,0,0.565609
8998,One Star It sucks they didn't release it on Xb...,0,0,0.991389


In [64]:
# all reviews where BERT fails to predict the star rating sorted by highest probability i.e. confidence of the model
combined_df[combined_df['Predicted'] != combined_df['Actual']].sort_values(by=['Highest Probability'])

Unnamed: 0,Full Review,Predicted,Actual,Highest Probability
2544,... really wasn't Wat I thought it would be no...,4,0,0.229864
461,"great game, day off from work ruined from late...",0,3,0.260622
5773,Don't read the one star reviews. Watch gamepla...,1,4,0.260807
7888,WARNING! This controller does NOT have a head...,4,3,0.262800
7195,"I love the series, but... I am not bashing on ...",0,2,0.267461
...,...,...,...,...
770,Great! Daughter Loves it!,4,3,0.985052
7485,excellent excellent!!,4,3,0.987234
7769,"Don't waste your money Cheap, useless.",0,1,0.989125
1643,Best generation of gaming. I brought this as a...,4,3,0.989623


In [62]:
combined_df[combined_df['Highest Probability'] < 0.90].sort_values(by=['Highest Probability'])

Unnamed: 0,Full Review,Predicted,Actual,Highest Probability
2544,... really wasn't Wat I thought it would be no...,4,0,0.229864
6782,I hope you really like watching loading screen...,2,2,0.241429
4119,Guy at Best Buy says that it's common Not sure...,2,2,0.258818
461,"great game, day off from work ruined from late...",0,3,0.260622
5773,Don't read the one star reviews. Watch gamepla...,1,4,0.260807
...,...,...,...,...
1327,Perfect I got this to keep my xbox cooler and ...,4,4,0.899374
7633,... CAN BE SO VERY VERY EVIL IN GAME ITS LIKE ...,4,4,0.899440
1673,Was an easy apply and does the job it says Was...,4,4,0.899507
6135,"Good stuff Brand new, cheaper than the store.<...",4,4,0.899509


TO-DOs:

 a) run more epochs? plot the accuracy plot for training & validation set

 b) try larger/smaller batch size

 c) different learning rate/optimizers?

 d) what should be the optimal max_len for our dataset?

 e) how about different pre-trained model? (e.g. bert-base-cased)

 f) Instead of bert, try different transformer-based model (i.e. distilbert, roberta, ...)