In [19]:
import torch.nn as nn, torch

# Example of target with class indices
loss = nn.CrossEntropyLoss()

In [20]:
inputs = torch.randn(3, 5, requires_grad=True)
inputs

tensor([[ 0.2974,  0.8668,  0.4024,  0.9320, -0.4867],
        [ 1.7119, -1.9887,  0.0033, -0.3764,  1.1037],
        [ 0.8845,  1.6104,  0.5904,  0.1680, -1.3059]], requires_grad=True)

In [21]:
target = torch.empty(3, dtype=torch.long).random_(5)
target = torch.tensor([3, 0, 1])
# target = torch.tensor([3, 1, 2])
target

tensor([3, 0, 1])

In [22]:
output = loss(inputs, target)
output

tensor(0.8600, grad_fn=<NllLossBackward0>)

In [74]:
from transformers import BertTokenizer, BertForMultipleChoice
import torch

tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForMultipleChoice.from_pretrained("bert-base-uncased")

prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
choice0 = "It is eaten with a fork and a knife."
choice1 = "It is eaten while held in the hand."
labels = torch.tensor(0).unsqueeze(0)  # choice0 is correct (according to Wikipedia ;)), batch size 1

encoding = tokenizer([prompt, prompt], [choice0, choice1], return_tensors="pt", padding=True)
outputs = model(**{k: v.unsqueeze(0) for k, v in encoding.items()}, labels=labels)  # batch size is 1

# the linear classifier still needs to be trained
loss = outputs.loss
logits = outputs.logits

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMultipleChoice: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertForMultipleChoice from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMultipleChoice from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForMultipleChoice were not initialized from the model checkpoint at bert-base-uncased and are newly

In [75]:
loss

tensor(0.7047, grad_fn=<NllLossBackward0>)

In [76]:
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
choice0 = "It is eaten with a fork and a knife."
choice1 = "It is eaten while held in the hand."
choice2 = "It is not a food."
choice3 = "Maybe Italians hate pizza."

labels = torch.tensor(0).unsqueeze(0)  # choice0 is correct (according to Wikipedia ;)), batch size 1

encoding = tokenizer([prompt, prompt, prompt, prompt], [choice0, choice1, choice2, choice3], return_tensors="pt", padding=True)
outputs = model(**{k: v.unsqueeze(0) for k, v in encoding.items()}, labels=labels)  # batch size is 1

# the linear classifier still needs to be trained
loss = outputs.loss
logits = outputs.logits

In [77]:
loss

tensor(1.4111, grad_fn=<NllLossBackward0>)

In [37]:
from transformers import CharacterBertTokenizer,CharacterBertModel

model = CharacterBertModel.from_pretrained("E:\Documents\Character Bert\Hate Speech\character-bert-hindi")

Some weights of the model checkpoint at E:\Documents\Character Bert\Hate Speech\character-bert-hindi were not used when initializing CharacterBertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing CharacterBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing CharacterBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [56]:
tokenizer2 = CharacterBertTokenizer(strip_accents=None, do_lower_case=None)

In [57]:
encoding = tokenizer2([prompt, prompt, prompt, prompt], [choice0, choice1, choice2, choice3], return_tensors="pt", padding=True)

In [58]:
input_ids = encoding['input_ids']
input_ids.size()

torch.Size([4, 32, 50])

In [59]:
# input_ids = input_ids.view(-1, input_ids.size(-1)) if input_ids is not None else None

In [60]:
# input_ids.size()

In [61]:
attention_mask = encoding['attention_mask']

In [62]:
attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None
print(attention_mask.size())

torch.Size([4, 32])


In [63]:
token_type_ids = encoding['token_type_ids']
token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None
print(token_type_ids.size())

torch.Size([4, 32])


In [64]:
position_ids=None
head_mask=None
inputs_embeds=None
labels=None
output_attentions=None
output_hidden_states=None
return_dict=None

In [65]:
input_ids[:,:,0].size()

torch.Size([4, 32])

In [66]:
outputs = model(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

In [67]:
pooled_output = outputs[1]
pooled_output

tensor([[ 0.2370, -0.5007,  0.4099,  ...,  0.4214, -0.2825,  0.2255],
        [ 0.2024, -0.5325,  0.4142,  ...,  0.4162, -0.1954,  0.1529],
        [ 0.3460, -0.5662,  0.4103,  ...,  0.3715, -0.2643,  0.2628],
        [ 0.3098, -0.6016,  0.3524,  ...,  0.4059, -0.3249,  0.2686]],
       grad_fn=<TanhBackward0>)

In [68]:
import torch.nn as nn

classifier = nn.Linear(768, 1)

In [69]:
logits = classifier(pooled_output)
logits

tensor([[0.0327],
        [0.0407],
        [0.0368],
        [0.0129]], grad_fn=<AddmmBackward0>)

In [70]:
reshaped_logits = logits.view(-1, 4)
reshaped_logits

tensor([[0.0327, 0.0407, 0.0368, 0.0129]], grad_fn=<ViewBackward0>)

In [71]:
loss = None
labels = torch.tensor(1).unsqueeze(0)
labels

tensor([1])

In [72]:
from torch.nn import CrossEntropyLoss

if labels is not None:
    loss_fct = CrossEntropyLoss()
    loss = loss_fct(reshaped_logits, labels)

In [73]:
loss

tensor(1.3764, grad_fn=<NllLossBackward0>)