In [None]:
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer, AutoConfig
from scipy.special import softmax
import re
import pandas as pd

In [None]:
# Preprocess text (username and link placeholders)
def preprocess(text):
    new_text = re.sub(r'@\S+', '@user', text)
    return new_text
MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
tokenizer = AutoTokenizer.from_pretrained(MODEL)

model = AutoModelForSequenceClassification.from_pretrained(MODEL)
#model.save_pretrained(MODEL)


def sentiment_score(text):
    """ 
    Detects the sentiment of a string of text, using the RoBERTa model.
    :param text: string containing tweet text 
    :returns: sentiment score: int = positive - negative
    """
    score_sum = 'NI'
    try:
        # Sentiment detection
        text_processed = preprocess(str(text))
        encoded_input = tokenizer(text_processed, return_tensors='pt')
        output = model(**encoded_input)
        scores = output[0][0].detach().numpy()
        scores = softmax(scores)
        score_sum = (scores[2] - scores[0]) # score = positive - negative
    except Exception as e:
        print(e)
        pass

    return score_sum

In [None]:
# data source: https://www.kaggle.com/datasets/jp797498e/twitter-entity-sentiment-analysis

valdata2 = pd.read_csv(r'more_valdata.csv')
valdata2

In [None]:
correct = 0
correct2 = 0
total = 0

In [None]:
for index, row in valdata2.iterrows():
    total += 1

    score = sentiment_score(row['text'])
    actual = row['airline_sentiment']
    if score < -0.2 and actual == 'negative':
        correct += 1
    elif score > 0.2 and actual == 'positive':
        correct += 1
    elif -0.2 < score < 0.2 and actual == 'neutral':
        correct += 1
    
    if score < -0.4 and actual == 'negative':
        correct2 += 1
    elif score > 0.4 and actual == 'positive':
        correct2 += 1
    elif -0.4 < score < 0.4 and actual == 'neutral':
        correct2 += 1

print(f'Correct at 0.2: {correct}')
print(f'Correct at 0.4: {correct2}')
print(f'Total: {total}')
print(f'Accuracy with 0.2: {(correct/total)*100}%')
print(f'Accuracy with 0.4: {(correct2/total)*100}%')