In [1]:
!pip3 install torch
!pip3 install -q transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import os
import json
import gzip
import pandas as pd
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from transformers import pipeline

In [3]:
distilbert = pipeline("sentiment-analysis",model="distilbert-base-uncased-finetuned-sst-2-english") # https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english?text=I+like+you.+I+love+you
roberta = pipeline("sentiment-analysis",model="siebert/sentiment-roberta-large-english") # https://huggingface.co/siebert/sentiment-roberta-large-english?text=I+like+you.+I+love+you
bert_mlm = pipeline("sentiment-analysis",model="Seethal/sentiment_analysis_generic_dataset") # https://huggingface.co/Seethal/sentiment_analysis_generic_dataset?text=I+like+you.+I+love+you
bert_twitter = pipeline("sentiment-analysis",model="cardiffnlp/twitter-roberta-base-sentiment") # https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment?text=I+like+you.+I+love+you

In [4]:
# load data
# https://colab.research.google.com/drive/1Zv6MARGQcrBbLHyjPVVMZVnRWsRnVMpV#scrollTo=7igYuRaV4bF7
data = []
with gzip.open('reviews_Video_Games.json.gz') as f:
    for l in f:
        data.append(json.loads(l.strip()))
    
print(len(data))

1324753


In [5]:
df = pd.DataFrame.from_dict(data)
df.loc[df['overall'] == 5.0, 'overall'] = 'POSITIVE'
df.loc[df['overall'] == 4.0, 'overall'] = 'POSITIVE'
df.loc[df['overall'] == 3.0, 'overall'] = 'NEUTRAL'
df.loc[df['overall'] == 2.0, 'overall'] = 'NEGATIVE'
df.loc[df['overall'] == 1.0, 'overall'] = 'NEGATIVE'

print(df['overall'].value_counts())

POSITIVE    970030
NEGATIVE    230353
NEUTRAL     124370
Name: overall, dtype: int64


In [6]:
df_no_neutral = df.copy()
df_no_neutral.loc[df_no_neutral['overall'] == 'NEUTRAL', 'overall'] = 'POSITIVE'
print(df_no_neutral['overall'].value_counts())

POSITIVE    1094400
NEGATIVE     230353
Name: overall, dtype: int64


In [7]:
label_to_class = {
  'LABEL_2': 'POSITIVE',
  'LABEL_1': 'NEUTRAL',
  'LABEL_0': 'NEGATIVE',
}
size = 500
df_test = df[:size]
df_no_neutral_test = df_no_neutral[:size]

print(df_test['overall'].value_counts())
print(df_no_neutral_test['overall'].value_counts())

POSITIVE    330
NEGATIVE    119
NEUTRAL      51
Name: overall, dtype: int64
POSITIVE    381
NEGATIVE    119
Name: overall, dtype: int64


In [8]:
def score_model(preds, labels, is_binary):
  score_params = ['binary', 'POSITIVE'] if is_binary else ['macro', None]

  prec = precision_score(preds, labels, average=score_params[0], pos_label=score_params[1])
  rec = recall_score(preds, labels, average=score_params[0], pos_label=score_params[1]) 
  f1 = f1_score(preds, labels, average=score_params[0], pos_label=score_params[1])
  acc = accuracy_score(preds, labels)

  print('Precision: ' + str(prec))
  print('Recall: ' + str(rec))
  print('F1: ' + str(f1))
  print('Accuracy: ' + str(acc * 100) + '%')


def test_model(model, data, is_binary=True, convert_label=False):
  input = data.loc[:,'reviewText'].values.tolist()
  labels = data.loc[:,'overall'].values.tolist()

  tokenizer_kwargs = {'truncation':True, 'max_length':512}
  scores = model(input[:size], **tokenizer_kwargs)

  if convert_label:
    preds = list(map(lambda x: label_to_class[x['label']], scores))
  else:
    preds = list(map(lambda x: x['label'], scores))
    
  score_model(preds, labels, is_binary)
  return scores


In [9]:
print('DistilBERT:')
distil_results = test_model(distilbert, df_no_neutral_test)

DistilBERT:
Precision: 0.7191601049868767
Recall: 0.958041958041958
F1: 0.8215892053973014
Accuracy: 76.2%


In [10]:
print('RoBERTa:')
roberta_results = test_model(roberta, df_no_neutral_test)

RoBERTa:
Precision: 0.8556430446194225
Recall: 0.9819277108433735
F1: 0.9144460028050491
Accuracy: 87.8%


In [11]:
print('BERT Base:')
bert_results = test_model(bert_mlm, df_test, False, True)

BERT Base:
Precision: 0.6150157032509973
Recall: 0.6362700493135276
F1: 0.6065233072457015
Accuracy: 72.2%


In [12]:
print('BERT Twitter:')
bert_twitter_results = test_model(bert_twitter, df_test, False, True)

BERT Twitter:
Precision: 0.6186571598336305
Recall: 0.5964274382892315
F1: 0.6055616511291738
Accuracy: 76.8%


In [13]:
def parse_label(result):
  label_in = result['label']
  label = label_to_class[label_in] if label_in in label_to_class else label_in

  return { 
    'label': label, 
    'score': result['score'] 
  }

com_res_n = []
for i in range(len(bert_twitter_results)):
  pos_score = 0
  neg_score = 0
  neu_score = 0

  results = [distil_results[i], roberta_results[i], bert_results[i], bert_twitter_results[i]]
  for res in results:
    parsed = parse_label(res)
    if(parsed['label'] == 'POSITIVE'):
      pos_score += parsed['score']
    elif parsed['label'] == 'NEGATIVE':
      neg_score += parsed['score']
    else:
      neu_score += parsed['score']

  if pos_score > neg_score and pos_score > neu_score:
    com_res_n.append('POSITIVE')
  elif neg_score > pos_score and neg_score > neu_score:
    com_res_n.append('NEGATIVE')
  else:
    com_res_n.append('NEUTRAL')

print('Combination Model with neutral')
score_model(com_res_n, df_test.loc[:,'overall'], False)

Combination Model with neutral
Precision: 0.6085221967574909
Recall: 0.5630252100840337
F1: 0.5732742176625252
Accuracy: 80.0%


In [14]:
com_res = []
for i in range(len(bert_twitter_results)):
  pos_score = 0
  neg_score = 0

  results = [distil_results[i], roberta_results[i]]
  for res in results:
    if(res['label'] == 'POSITIVE'):
      pos_score += res['score']
    else :
      neg_score += res['score']

  if pos_score > neg_score:
    com_res.append('POSITIVE')
  else:
    com_res.append('NEGATIVE')

print('Combination Model without neutral')
score_model(com_res, df_no_neutral_test.loc[:,'overall'], False)

Combination Model without neutral
Precision: 0.8999867663600873
Recall: 0.8232620320855615
F1: 0.846699662739258
Accuracy: 87.4%
