In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from transformers import BertModel, BertTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model_weights = torch.load("model/socmed_sentiment_model.pkl", map_location="cpu")

In [4]:
bert_pretrained = BertModel.from_pretrained(
    "indobenchmark/indobert-large-p2",
    cache_dir="model",
    device_map="cpu",
    local_files_only=True,
)
bert_tokenizer = BertTokenizer.from_pretrained(
    "indobenchmark/indobert-large-p2",
    cache_dir="model",
    device_map="cpu",
    local_files_only=True,
)

In [5]:
for name, param in model_weights.items():
    if "out." in name:
        print(name, param.size())

out.weight torch.Size([3, 1024])
out.bias torch.Size([3])


In [6]:
num_class = 3
dropout_layer = nn.Dropout(p=.2)
classifier_layer = nn.Linear(bert_pretrained.config.hidden_size, num_class)
bert_pretrained.load_state_dict({k.replace("bert.", ""): v for k, v in model_weights.items() if "bert." in k})
classifier_layer.load_state_dict({k.replace("out.", ""): v for k, v in model_weights.items() if "out." in k})

for param in bert_pretrained.parameters():
    param.requires_grad = False

In [8]:
texts = ["hadeh biznet lama2 makin jelek"]
encoding = bert_tokenizer.encode_plus(
    texts,
    add_special_tokens=True,
    # max_length=bert_tokenizer.vocab_size,
    max_length=512,
    return_token_type_ids=False,
    padding='max_length',
    truncation=True,
    return_attention_mask=True,
    return_tensors='pt',
)

In [9]:
bert_pretrained = bert_pretrained.eval()
predictions = []
prediction_probs = []
with torch.no_grad():
    input_ids = encoding["input_ids"].to("cpu")
    attention_mask = encoding["attention_mask"].to("cpu")

    pooled_output = bert_pretrained(
        input_ids=input_ids, 
        attention_mask=attention_mask
    )
    last_hidden_states = pooled_output[0]
    outputs = dropout_layer(pooled_output[1])
    outputs = classifier_layer(outputs)

    # Process the output 
    _, preds = torch.max(outputs, dim=1)
    probs = F.softmax(outputs, dim=1)

    predictions.extend(preds)
    prediction_probs.extend(probs)

In [11]:
predictions, prediction_probs

([tensor(2)], [tensor([2.8022e-04, 2.4270e-04, 9.9948e-01])])

In [12]:
class TselDataset(Dataset):
    def __init__(self, texts, targets, tokenizer, max_len):
        self.texts = texts
        self.targets = targets
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, item):
        review = str(self.texts[item])
        target = self.targets[item]

        encoding = self.tokenizer.encode_plus(
            review,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            #pad_to_max_length=True,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'review_text': review,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'targets': torch.tensor(target, dtype=torch.long)
        }