In [60]:
from transformers import RobertaForSequenceClassification, AutoTokenizer

from transformers import logging
logging.set_verbosity_error()

import torch

import numpy as np

idx_to_label = {0: 'CONTENT', 1: 'USER_EXPERIENCE', 2: 'SUBSCRIPTION', 3: 'INTERFACE'}

In [25]:
# it is also possible to load the models by using AutoModel
# but using RobertaForSequenceClassification is more convenient and understandable 
# because we are using a fine-tuned RoBERTa model
model_20 = RobertaForSequenceClassification.from_pretrained('../models/review-classification-roberta-20-trials')
model_40 = RobertaForSequenceClassification.from_pretrained('../models/review-classification-roberta-40-trials')

In [39]:
# we trained both models with same tokenizer, it doesn't matter which one we pick
tokenizer = AutoTokenizer.from_pretrained('../models/review-classification-roberta-20-trials')

In [64]:
def predict_20(texts):
    if type(texts)==str:
        texts = [texts]

    tokenized = tokenizer(texts, truncation=True, padding=True, return_tensors='pt')
    
    with torch.no_grad():
        output_logits = model_20(**tokenized).logits

    predictions = torch.argmax(output_logits, dim=-1).numpy()

    labeled_preds = [idx_to_label[prediction] for prediction in predictions]
    return labeled_preds

def predict_40(texts):
    if type(texts)==str:
            texts = [texts]

    tokenized = tokenizer(texts, truncation=True, padding=True, return_tensors='pt')
    
    with torch.no_grad():
        output_logits = model_40(**tokenized).logits

    predictions = torch.argmax(output_logits, dim=-1).numpy()

    labeled_preds = [idx_to_label[prediction] for prediction in predictions]
    return labeled_preds



preds_20 = predict_20(['it is a disgusting app', 'Easy to use, interface is awesome'])
preds_40 = predict_40(['it is a disgusting app', 'Easy to use, interface is awesome'])

print(preds_20)
print(preds_40)

['USER_EXPERIENCE', 'INTERFACE']
['USER_EXPERIENCE', 'INTERFACE']
