In [1]:
from core.dataloaders.focus_dataloader import PytorchFoCusDatasetV2, FoCusDatasetV1
from core.tokenizers.bart_tokenizers import BartFoCusTokenizerV1
from core.hyperparameters.bart_hyperparameters import BartHyperparametersV1

hyper = BartHyperparametersV1()
focus_dataset = FoCusDatasetV1(
    input_dataset_path="./datasets/FoCus/valid_focus.json"
)

tokenizer = BartFoCusTokenizerV1.from_pretrained(
    "facebook/bart-base",
    hyperparameters=hyper,
)

pytorch_foc = PytorchFoCusDatasetV2(
    dataset=focus_dataset,
    tokenizer=tokenizer,
    hyperparameters=hyper,
)

  from .autonotebook import tqdm as notebook_tqdm


In [1]:
from core.dataloaders.focus_dataloader import FoCusLightningDataModuleV2
from core.tokenizers.bart_tokenizers import BartFoCusTokenizerV1
from core.hyperparameters.bart_hyperparameters import BartHyperparametersV2
from core.base_models.bart_models import BartLMV2
from transformers import BartConfig

hyper = BartHyperparametersV2()

tokenizer = BartFoCusTokenizerV1.from_pretrained(
    hyper.model_name,
    hyperparameters=hyper,
)

data_module = FoCusLightningDataModuleV2(
    train_path_dataset="./datasets/FoCus/train_focus.json",
    valid_path_dataset="./datasets/FoCus/valid_focus.json",
    hyperparameters=hyper,
    tokenizer=tokenizer,  
    is_debug=True,
)
data_module.setup()
input_item = next(iter(data_module.train_dataloader()))

model = BartLMV2(
    config=BartConfig.from_pretrained(hyper.model_name),
    hyperparameters=hyper,
    tokenizer=tokenizer,
)

model.resize_token_embeddings(len(tokenizer))
model.forward(
    **input_item,
)
print("Done")

  from .autonotebook import tqdm as notebook_tqdm


Done


In [4]:
import torch
from torch import nn

# head = nn.Linear(3, 4, bias=False)
# input = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32)
# logits = head(input)
# targets = torch.tensor([1, 2], dtype=torch.long)
# loss = nn.BCELoss()(logits, targets)

input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
input, target


(tensor([[-0.3922,  1.7638,  0.5222,  0.3016, -0.2778],
         [ 1.3271, -1.2603, -0.0327, -0.4648, -1.5070],
         [-1.0322,  0.2424, -1.2268,  1.4694, -1.6350]], requires_grad=True),
 tensor([3, 1, 1]))

In [7]:
from torch.nn import Sigmoid
import torch
(Sigmoid()(torch.randn([2, 10])) > 0.5).int().view(-1)

tensor([0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1],
       dtype=torch.int32)

In [29]:
import torch
target = torch.randint(0, 5, (2, 1)).view(-1)
logits = torch.randn(2, 5)
preds = torch.argmax(logits, dim=1)
print(preds)
print(target)
(preds == target).float().mean().cpu().item()

tensor([1, 3])
tensor([0, 4])


0.0

In [6]:
torch.randint(0, 10, [2, 1])

tensor([[9],
        [8]])

In [13]:
import torch

batch_size = 2
num_classes = 11

loss_fn = torch.nn.BCELoss()

outputs_before_sigmoid = torch.randn(batch_size, num_classes, dtype=torch.float32)
sigmoid_outputs = torch.sigmoid(outputs_before_sigmoid)
target_classes = torch.randn((batch_size, num_classes))  # randints in [0, 2).

loss = loss_fn(sigmoid_outputs, target_classes)

# alternatively, use BCE with logits, on outputs before sigmoid.
loss_fn_2 = torch.nn.BCEWithLogitsLoss()
loss2 = loss_fn_2(outputs_before_sigmoid, target_classes)
print(loss, loss2)

tensor(1.1145) tensor(1.1145)


In [7]:
torch.randn([2, 5])

tensor([[ 0.5799,  0.4033,  0.1488, -1.3507,  1.3772],
        [ 0.2756,  0.4271,  0.2537,  0.6392, -1.0257]])

In [5]:
target = torch.ones([2, 5], dtype=torch.float32)  # 64 classes, batch size = 10
output = torch.full([2, 5], 1.5)  # A prediction (logit)
pos_weight = torch.ones([5])  # All weights are equal to 1
criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)
criterion(output, target)  # -log(sigmoid(1.5))
output, target

(tensor([[1.5000, 1.5000, 1.5000, 1.5000, 1.5000],
         [1.5000, 1.5000, 1.5000, 1.5000, 1.5000]]),
 tensor([[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]]))

In [4]:
sample = pytorch_foc[11]
input_ids = sample["input_ids"]
print(sample["dialog_bos_index"])
dialog_bos = tokenizer.get_vocab().get(hyper.dialog_bos_token)

assert sample["dialog_bos_index"] == input_ids.index(dialog_bos), f"dialog_bos_index {sample['dialog_bos_index']} {input_ids.index(dialog_bos)}" 

print(sample["dialog_eos_index"])
dialog_eos = tokenizer.get_vocab().get(hyper.dialog_eos_token)
input_ids.index(dialog_eos)
assert sample["dialog_eos_index"] == input_ids.index(dialog_eos), f"dialog_eos_index {sample['dialog_eos_index']} {input_ids.index(dialog_eos)}"

249
272
