In [35]:
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
import numpy as np
from scipy.special import softmax
import re
import pandas as pd

In [36]:
# Preprocess text (username and link placeholders)
def preprocess(text):
    new_text = re.sub(r'@\S+', '@user', text)
    return new_text
MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)

model = AutoModelForSequenceClassification.from_pretrained(MODEL)
#model.save_pretrained(MODEL)


def sentiment_score(text):
    """ 
    Detects the sentiment of a string of text, using the RoBERTa model.
    :param text: string containing tweet text 
    :returns: sentiment score: int = positive - negative
    """
    score_sum = 'NI'
    try:
        # Sentiment detection
        text_processed = preprocess(str(text))
        encoded_input = tokenizer(text_processed, return_tensors='pt')
        output = model(**encoded_input)
        scores = output[0][0].detach().numpy()
        scores = softmax(scores)
        score_sum = (scores[2] - scores[0]) # score = positive - negative
    except Exception as e:
        print(e)
        pass

    return score_sum

Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [37]:
# data source: https://www.kaggle.com/datasets/jp797498e/twitter-entity-sentiment-analysis

valdata = pd.read_csv(r'validation_data.csv', names=['irrelevant', 'also irrelevant', 'sentiment', 'text'])

       irrelevant also irrelevant sentiment  \
0            2401     Borderlands  Positive   
1            2401     Borderlands  Positive   
2            2401     Borderlands  Positive   
3            2401     Borderlands  Positive   
4            2401     Borderlands  Positive   
...           ...             ...       ...   
74677        9200          Nvidia  Positive   
74678        9200          Nvidia  Positive   
74679        9200          Nvidia  Positive   
74680        9200          Nvidia  Positive   
74681        9200          Nvidia  Positive   

                                                    text  
0      im getting on borderlands and i will murder yo...  
1      I am coming to the borders and I will kill you...  
2      im getting on borderlands and i will kill you ...  
3      im coming on borderlands and i will murder you...  
4      im getting on borderlands 2 and i will murder ...  
...                                                  ...  
74677  Just realized t

In [38]:
correct = 0
wrong = 0
total = 0

In [39]:
for index, row in valdata.iterrows():
    total += 1
    score = sentiment_score(row['text'])
    actual = row['sentiment']
    if score < -0.3 and actual == 'Negative':
        correct += 1
    elif score > 0.3 and actual == 'Positive':
        correct += 1
    elif -0.3 < score < 0.3 and actual == 'Neutral':
        correct += 1
    else:
        wrong += 1

print(correct)
print(wrong)
print(total)


im getting on borderlands and i will murder you all ,
I am coming to the borders and I will kill you all,
im getting on borderlands and i will kill you all,
im coming on borderlands and i will murder you all,
im getting on borderlands 2 and i will murder you me all,
im getting into borderlands and i can murder you all,
So I spent a few hours making something for fun. . . If you don't know I am a HUGE @Borderlands fan and Maya is one of my favorite characters. So I decided to make myself a wallpaper for my PC. . Here is the original image versus the creation I made :) Enjoy! pic.twitter.com/mLsI5wf9Jg
So I spent a couple of hours doing something for fun... If you don't know that I'm a huge @ Borderlands fan and Maya is one of my favorite characters, I decided to make a wallpaper for my PC.. Here's the original picture compared to the creation I made:) Have fun! pic.twitter.com / mLsI5wf9Jg
So I spent a few hours doing something for fun... If you don't know I'm a HUGE @ Borderlands fan a