In [None]:
import pandas as pd
from transformers import pipeline
import torch
import numpy as np
from src.main.model.roberta import train_roberta_sentiment
from src.main.util.preprocessing import pre_process, preprocess_tweet

In [None]:
# read the data from resources/data and present them
test = pd.read_csv("../resources/data/test.csv")
train = pd.read_csv("../resources/data/training.csv")
train.head()

In [None]:
# use gpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
trainer, metrics = train_roberta_sentiment(train)
print(trainer)
metrics

In [None]:
CANDIDATES = ["positive", "negative", "neutral"]

train['clean'] = train['sentence'].apply(preprocess_tweet)
sentences = train['clean'].tolist()


roberta_clf = pipeline(
    task="sentiment-analysis",
    model="cardiffnlp/twitter-roberta-base-sentiment-latest",
    tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest",
    device=0,
    batch_size=64,
    padding=True,
    max_length=512,
    truncation=True
)
roberta_preds = roberta_clf(sentences, truncation=True)
train["pred"] = [p["label"].lower() for p in roberta_preds]

train.head()


In [None]:
_LABEL2NUM = {
    "negative": -1,
    "neutral": 0,
    "positive": 1
}

def _to_num(x):
    if isinstance(x, str):
        return _LABEL2NUM[x.strip().lower()]
    return x 


def sentiment_score(y_true, y_pred):
    y_t = np.fromiter((_to_num(t) for t in y_true), dtype=np.int8)
    y_p = np.fromiter((_to_num(p) for p in y_pred), dtype=np.int8)
    mean_abs_diff = np.abs(y_t - y_p).mean()
    return 0.5 * (2.0 - mean_abs_diff)


results = sentiment_score(train["label"], train["pred"])
print("Sentiment score: ", results)


# Roberta is the chosen one

In [None]:
test['clean'] = test['sentence'].apply(preprocess_tweet)
test_sentences = test['clean'].tolist()
rob_test = roberta_clf(test_sentences, truncation=True)
test["label"] = [p["label"].lower() for p in rob_test]
test.head()

In [None]:
test[["id", "label"]].to_csv("../resources/data/result.csv", index=False)