In [1]:
import re
import torch
import spacy
from torch import nn
from torch.utils.data import Dataset
from torch.optim import AdamW
from transformers import (
    BertTokenizer,
    BertModel,
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [2]:
class MultiClassClassifier(nn.Module):
    def __init__(self, bert_model_name: str, hidden_size: int, num_classes: int):
        super(MultiClassClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model_name)
        self.dropout = nn.Dropout(0.1)
        self.linear1 = nn.Linear(self.bert.config.hidden_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input_ids, attention_mask):
        outputs_bert = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs_bert.pooler_output
        x = self.dropout(pooled_output)
        logits = self.linear1(x)
        logits = self.linear2(logits)
        probs = self.softmax(logits)
        return probs

In [3]:
def predict_tratamento_texto(text: str, lemma=True):
    nlp = spacy.load("en_core_web_sm", exclude=["parser", "ner"])
    text = text.lower()
    text = re.sub(r"\s+", " ", text).strip()
    text = re.sub(re.compile("<.*?>"), "", text)
    doc = nlp(text)
    if lemma == True:
        text = " ".join(
            [token.lemma_ for token in doc if not token.is_stop and not token.is_punct]
        )  # retorna o lemma
    else:
        text = " ".join(
            [token.text for token in doc if not token.is_stop and not token.is_punct]
        )  # return text
    return text

In [4]:
def predict_sentiment(
    text: str, model: BertModel, tokenizer: BertTokenizer, device: str, max_length: int
):
    model.eval()
    text = predict_tratamento_texto(text, lemma=False)
    encoding = tokenizer(
        text,
        return_tensors="pt",
        max_length=max_length,
        padding="max_length",
        truncation=True,
    )
    input_ids = encoding["input_ids"].to(device)
    attention_mask = encoding["attention_mask"].to(device)

    with torch.no_grad():
        prob = model(input_ids=input_ids, attention_mask=attention_mask)

    prob_neg = prob[0, 0].item()
    prob_pos = prob[0, 1].item()
    print(f"prob negativa: {prob_neg:.5f}")
    print(f"prob positiva: {prob_pos:.5f}")

    if prob_neg >= 0.85 and prob_pos < 0.6:
        return "negative"
    elif prob_neg < 0.6 and prob_pos >= 0.85:
        return "positive"
    else:
        return "neutral"

In [5]:
# Set up parameters
BERT_MODEL_NAME = "bert-base-uncased"
hidden_state = 10
num_classes = 2  # numero de neuronios de saída
max_length = 128
batch_size = 16
num_epochs = 4
learning_rate = 2e-5

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)

In [8]:
PATH_MODEL_SAVE = "D:/tcc2/guilherme/saved_models/sentiment_classifier_Bert_IMDB_Dataset_sem_lemma.pth"
model = MultiClassClassifier(BERT_MODEL_NAME, hidden_state, num_classes).to(device)
model.load_state_dict(torch.load(PATH_MODEL_SAVE))

<All keys matched successfully>

In [9]:
optimizer = AdamW(model.parameters(), lr=learning_rate)

In [10]:
# Capita marvel 2019 5/10
text = """Plot

Carol Danvers     becomes one of the universe's most powerful heroes when Earth is caught in the middle of a galactic war between two alien races.

Cast

Brie Larson, Samuel L. Jackson (Because duh), Jude "Just consistently dreadful" Law, Annette Bening, Djimon Hounsou, Clark Gregg and blink and you'll miss him Lee Pace who returns as Ronan but looks so different I didn't even think it was him.

Verdict

I watched this back when it was initially released, I watched it a second time a few days ago as the missus is wanting us to binge watch the entire MCU as she's very behind. My opinion has changed on the 2nd viewing and not in a good way, my rating has shifted from a 6/10 to 5/10.

You see straight out of the gate the first thing you notice about Carol Danvers is she's not really a character you can get behind. She's not funny, she's not entertaining, she comes across as a surly teenage girl who is just upset at the slightest thing and just doesn't want to be there. This is not a character you can build a movie around, like trying to make a teenage Groot movie! It wouldn't work, but he's okay as a side character.

Supporting cast are also hit and miss, Jackson and Gregg are great, but Lynch and Law just stink up every scene they're in.

I'm a Marvel fan but I recognize where it's weak, this is a distinctly average film that serves as a standalone origin story and doesn't contribute much to the universe as a whole.

Rants

I remember when the movie came out all the controversy with Brie Larson, I just had to Google what the controversy even was as I don't remember due to not focusing on such things. Now I can't really get a definitive answer. From what I see it's a combination of people not liking her attitude and her comments on feminism. So I Googled further to see what she said, she came across arrogant in them and a smidge out of touch but none of it explained the overwhelming hate I've seen aimed at her. Then I remembered that people talk about all the different types of bigotry but misogyny rarely comes up, I remembered that it's visibly increased over the past decade and appreciated why she's been targeted. News flash, the outspoken loud brash man hating femnists you likely thing of when you hear that word make up a very small percentage. Feminism is good, if you disagree I hope you simply don't know the meaning of the word.

The Good

Jackson and Gregg Has a couple of decent moments Not a bad soundtrack Goose!

The Bad

Larson isn't great Law and Lynch are terrible Lead just comes across unlikable.

Overall just a weak entry to the MC
"""

In [11]:
# Test sentiment prediction
# test_text = " very perfect very good very bad bad well, "
sentiment = predict_sentiment(text, model, tokenizer, device, max_length)
# sentiment2 = predict_sentiment(text, saved_model, tokenizer, device, max_length)

print(predict_tratamento_texto(text, lemma=False))
print(f"Predicted sentiment: {sentiment}")
# print(f"Predicted sentiment: {sentiment2}")

prob negativa: 0.83986
prob positiva: 0.16014
plot carol danvers universe powerful heroes earth caught middle galactic war alien races cast brie larson samuel l. jackson duh jude consistently dreadful law annette bening djimon hounsou clark gregg blink miss lee pace returns ronan looks different think verdict watched initially released watched second time days ago missus wanting binge watch entire mcu opinion changed 2nd viewing good way rating shifted 6/10 5/10 straight gate thing notice carol danvers character funny entertaining comes surly teenage girl upset slightest thing want character build movie like trying teenage groot movie work okay character supporting cast hit miss jackson gregg great lynch law stink scene marvel fan recognize weak distinctly average film serves standalone origin story contribute universe rants remember movie came controversy brie larson google controversy remember focusing things definitive answer combination people liking attitude comments feminism goog

In [12]:
# Capitã Marvel (2019) nota 3/10
test_text = f"I am a Marvel fan. I go see all the movies, then buy the blu-rays. I have character statues, posters and have read the comics including Captain Marvel. There is a reason people become fans of certain things, and those things done well over time generate more fans until it becomes a \"Juggernaut\" like the MCU has. I fear that it's not unstoppable however, as the sub-par Captain Marvel has painfully illustrated. The movie is a pastiche of poorly written scenes with a bit of uninspired action sprinkled here and there with barely any plot at all. Carol Danvers has no character development whatsoever, no adversity to overcome besides amnesia and no villain to fight. I know there are people saying they like this movie, but I feel like they either aren't being honest or they like it for personal reasons that are not present in the movie. Objectively speaking, in terms of the technical aspects of storytelling, Captain Marvel just isn't effective. Subjectively, I was bored the entire time and the nostalgia parts made me feel as though JJ Abrams was poking me in the ribs asking me if I 'member the 90's. Yes. Yes I do. And for the record, people are criticizing this movie because it was bad, not because they \"haaaate the wooomans!\" Dismissing legitimate criticisms using this tactic will not produce better films, which is the only thing most people want. The problem is that they built the movie around the idea of \"strong woman\" and promoted it as an identity film, so when the movie ends up being bad some people rush to defend it because they don't want the IDEA to fail. The movie itself is secondary - just a vehicle to slap their bumper stickers on, which is a real shame because this could have been a great addition to the MCU. I hope they learn something from this, but from what I've heard it appears that Marvel actually plans to escalate the identity politics in the next phase which would be an absolute disaster for them. When pointing to the box office as a defense for how \"good\" Captain Marvel was, just remember that tons of paying customers didn't like it at all."
sentiment = predict_sentiment(test_text, model, tokenizer, device, max_length)
print(test_text)
print(f"Predicted sentiment: {sentiment}")

prob negativa: 0.99997
prob positiva: 0.00003
I am a Marvel fan. I go see all the movies, then buy the blu-rays. I have character statues, posters and have read the comics including Captain Marvel. There is a reason people become fans of certain things, and those things done well over time generate more fans until it becomes a "Juggernaut" like the MCU has. I fear that it's not unstoppable however, as the sub-par Captain Marvel has painfully illustrated. The movie is a pastiche of poorly written scenes with a bit of uninspired action sprinkled here and there with barely any plot at all. Carol Danvers has no character development whatsoever, no adversity to overcome besides amnesia and no villain to fight. I know there are people saying they like this movie, but I feel like they either aren't being honest or they like it for personal reasons that are not present in the movie. Objectively speaking, in terms of the technical aspects of storytelling, Captain Marvel just isn't effective. Su

In [13]:
# Capitã Marvel (2019) nota 10/10
test_text = """I loved every moment of this film. We speak so much about representation and here it is in a beautifully wrapped box. Where was this hitter when I was growing up in southwest Virginia? Not in my library I assure you. And yes, I know this isn't the real origin story. But I needed this movie. I am 41 years old. It shouldn't have taken this long."""
sentiment = predict_sentiment(test_text, model, tokenizer, device, max_length)
print(test_text)
print(f"Predicted sentiment: {sentiment}")

prob negativa: 0.00002
prob positiva: 0.99998
I loved every moment of this film. We speak so much about representation and here it is in a beautifully wrapped box. Where was this hitter when I was growing up in southwest Virginia? Not in my library I assure you. And yes, I know this isn't the real origin story. But I needed this movie. I am 41 years old. It shouldn't have taken this long.
Predicted sentiment: positive
