In [28]:
from transformers import BertTokenizer, BertModel

import torch
import numpy as np

In [3]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [29]:
class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = BertModel.from_pretrained('bert-base-uncased')
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 6)
        
    def forward(self, ids, mask, token_type_ids):
        _, output_1 = self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output

In [30]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = torch.load('bert_model.pth')
model.to(device)

BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [56]:
tokenized_sentence = tokenizer.encode_plus(
    'He is such a jerk',
    None,
    add_special_tokens=True,
    max_length=200,
    pad_to_max_length=True,
    return_token_type_ids=True
)

In [57]:
input_ids = tokenized_sentence['input_ids']
token_type_ids = tokenized_sentence['token_type_ids']
attention_masks = tokenized_sentence['attention_mask']

input_ids = torch.tensor(input_ids).to(device, dtype=torch.long).unsqueeze(0)
token_type_ids = torch.tensor(token_type_ids).to(device, dtype=torch.long).unsqueeze(0)
attention_masks = torch.tensor(attention_masks).to(device, dtype=torch.long).unsqueeze(0)

In [58]:
logits = model(input_ids, attention_masks, token_type_ids)

In [59]:
outputs = torch.sigmoid(logits.squeeze(0)).cpu().detach().numpy().tolist()

In [60]:
labels = ["toxic","severe_toxic","obscene","threat","insult","identity_hate"]

In [61]:
for index, label in enumerate(labels):
    print('{} : {}%'.format(label, round(outputs[index], 2)))

toxic : 0.96%
severe_toxic : 0.01%
obscene : 0.43%
threat : 0.0%
insult : 0.9%
identity_hate : 0.0%
