In [5]:
from transformers import (
    AutoTokenizer,
    DataCollatorWithPadding,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    pipeline,
)
import os
import torch
import requests
from bs4 import BeautifulSoup
import re

from datasets import load_dataset, Dataset
import pandas as pd
import numpy as np
import evaluate
import glob

CWD_PATH = os.getcwd()


In [18]:
OUT_DIR = "/home/pr_admin/ak_ws/story-vibe/source/output/checkpoint-08_07_2024"
device = torch.device('cuda:0')
tokenizer = AutoTokenizer.from_pretrained(OUT_DIR)
model = AutoModelForSequenceClassification.from_pretrained(OUT_DIR)
model.to(device)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

# Testing out the fine-tuned BERT model

In [15]:
# Sample sentences
sentences = [
    "I am feeling really down today. Nothing seems to be going right.",  # sad
    "Why did you do that? I'm so angry right now!",                      # angry
    "This is the best day of my life! I'm so happy!",                   # happy
    "I heard a strange noise outside my window. I'm scared.",           # afraid/scared\
    "My brothers goat is an animal."     ,                               # neutral
    "Harry Potter is a hero.",                                          # happy
    "You really came?",                                                  # surprise
    "How are you?"                                                      # neutral
]

# Tokenize the sample sentences
inputs = tokenizer(sentences, padding=True, truncation=True, return_tensors="pt").to(device)

# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

# Get the predicted labels
predictions = torch.argmax(outputs.logits, dim=1)

# Mapping from label IDs to label names (assuming you have the labels)
labels = model.config.id2label

# Print out the sentences with their predicted labels
for sentence, prediction in zip(sentences, predictions):
    print(f"Sentence: {sentence}")
    print(f"Predicted label: {labels[prediction.item()]}\n")

Sentence: I am feeling really down today. Nothing seems to be going right.
Predicted label: sadness

Sentence: Why did you do that? I'm so angry right now!
Predicted label: anger

Sentence: This is the best day of my life! I'm so happy!
Predicted label: happiness

Sentence: I heard a strange noise outside my window. I'm scared.
Predicted label: fear

Sentence: My brothers goat is an animal.
Predicted label: neutral

Sentence: Harry Potter is a hero.
Predicted label: happiness

Sentence: You really came?
Predicted label: surprise

Sentence: How are you?
Predicted label: neutral



In [35]:
print(CWD_PATH)
eval_data = pd.read_csv("/home/pr_admin/ak_ws/story-vibe/data/texts/emotions_dataset_personal_eval.csv")
# results = {i:[] for i in model.config.label2id}
results = {}
# print(eval_data)

with torch.no_grad():
    # for label, sentences in eval_data.items():
    for label in model.config.label2id:
        sentences = eval_data[label]
        inputs = tokenizer(sentences.to_list(), padding=True, truncation=True, return_tensors="pt").to(device)

        # Perform inference
        outputs = model(**inputs)

        # Get the predicted labels
        predictions = torch.argmax(outputs.logits, dim=1)
        results[model.config.label2id[label]] = predictions.to('cpu').tolist()

correct_preds = 0
all_preds = 0
for key, vals in results.items():
    for v in vals:
        if v == key:
            correct_preds +=1
        all_preds += 1

print(f"{(correct_preds/all_preds) * 100}% correct.") 



/home/pr_admin/ak_ws/story-vibe/source
62.637362637362635% correct.
