In [50]:
import os
import sys
sys.path.append("..")
import torch
from transformers import RobertaTokenizer
from src.models.Classifier import EmotionClassifier
from typing import List

In [51]:
class Inference():
    def __init__(self, model_path, model_name, pretrained_model_path = "../weights/twitter-roberta-base-sentiment-latest", max_length= 70):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.tokenizer = RobertaTokenizer.from_pretrained(model_path)
        self.model = EmotionClassifier(num_classes_emotion=3, pretrained_model_path= pretrained_model_path)
        self.model.load_state_dict(torch.load(os.path.join(model_path, model_name)))
        self.model.to(self.device)
        self.max_length = max_length
        self.id_to_emotion = ['No emotion toward brand or product', 'Positive emotion', 'Negative emotion']

    def get_batch_inference(self, batch_tweets: List)-> List:
        batch = self.tokenizer(batch_tweets, padding=True, truncation=True, max_length = 70, return_tensors="pt")
        batch.to(self.device)
        with torch.no_grad():
            output = self.model(**batch)
        return list(map(lambda x: self.id_to_emotion[x], torch.argmax(output, dim=1).tolist()))

#### Getting Results for test data

In [89]:
import pandas as pd
from src.utils import preprocessing

In [90]:
model_path = "../weights/v001"
model_name = "pytorch_model.bin"
inference = Inference(model_path= model_path, model_name= model_name)

In [91]:
original_test = pd.read_excel("../data/dataset.xlsx", sheet_name = "Test")
test = preprocessing(original_test, train=False)

In [95]:
# we don't need to do batch as this is only one time but I did it
batch_size = 16
tweets = test["tweet"].tolist()
emotion_list = []
for i in range(0, len(tweets), batch_size):
    emotion_list.extend(inference.get_batch_inference(tweets[i:i+batch_size]))
original_test["emotion"] = emotion_list

In [99]:
original_test.to_excel("../data/result.xlsx", sheet_name= "Test", index=False)