In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import torch

In [49]:
from argparse import Namespace

args = Namespace(
    num_queries=20,
    hidden_dim=10,
    random_state=42,
    preprocessing=True,
    test_size=0.2,
    batch_size = 5,
    input_path="./input/feedback-prize-2021/",
    device="cpu",
    set_cost_class=1,
    set_cost_bbox=1,
    set_cost_giou=1,
    bbox_loss_coef=0.5,
    giou_loss_coef=0.1,
    eos_coef=0.1,
    num_workers=2,
    start_epoch=0,
    epochs=10,
    lr=0.001,
    weight_decay=0.1,
    lr_drop=1,
    clip_max_norm=0
)

device = torch.device(args.device)

In [33]:
from datasets import build_fdb_data, collate_fn

dataset_train, dataset_val, postprocessor, num_classes = build_fdb_data(args)

100%|██████████| 15594/15594 [00:04<00:00, 3791.39it/s]


In [34]:
from models import build_models

tokenizer, model, criterion = build_models(num_classes, args)
model.to(device)

n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("number of params:", n_parameters)

Some weights of the model checkpoint at allenai/led-base-16384 were not used when initializing LEDModel: ['lm_head.weight', 'final_logits_bias']
- This IS expected if you are initializing LEDModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LEDModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


number of params: 161950614


In [52]:
import time
import datetime
from torch.utils.data import DataLoader

optimizer = torch.optim.AdamW(
    model.parameters(), lr=args.lr, weight_decay=args.weight_decay
)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop)

data_loader_train = DataLoader(
    dataset_train,
    shuffle=True,
    batch_size=args.batch_size,
    collate_fn=collate_fn,
    num_workers=args.num_workers,
)
data_loader_val = DataLoader(
    dataset_val,
    shuffle=False,
    batch_size=args.batch_size,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=args.num_workers,
)

In [None]:
# from tqdm import tqdm

# model.train()
# criterion.train()

# loss_list = []
# data_bar = tqdm(data_loader_train, desc=f"Train Epoch {0:4d}")
# for samples, targets, info in data_bar:
#     st = time.time()

#     targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

#     outputs = []
#     for doc in samples:
#         inputs = tokenizer([doc]).to(device)
#         outputs.append(model(inputs))
    
#     batch_outputs = {
#         key: torch.cat([o[key] for o in outputs])
#         for key in outputs[0].keys()
#     }
    
#     loss_dict = criterion(batch_outputs, targets)  # type: Dict[str, torch.Tensor]
    
#     mt = time.time()
    
#     weight_dict = criterion.weight_dict
#     losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict)  # type: ignore

#     loss_dict_unscaled = {f"{k}_unscaled": v for k, v in loss_dict.items()}
#     loss_dict_scaled = {
#         k: v * weight_dict[k] for k, v in loss_dict.items() if k in weight_dict
#     }
#     losses_scaled = sum(loss_dict_scaled.values())  # type: ignore

#     loss_value = losses_scaled.item()  # type: ignore

#     optimizer.zero_grad()
#     losses.backward()  # type: ignore
#     optimizer.step()
    
#     ot = time.time()
    
#     loss_list.append(losses.item())  # type: ignore
#     data_bar.set_postfix(
#         {
#             "lr": optimizer.param_groups[0]["lr"],
#             "mean_loss": sum(loss_list) / len(loss_list),
#             "model time": f'{mt - st:.2f} s',
#             "optim time": f'{ot - mt:.2f} s'
#         }
#     )


In [None]:
from engine import train_one_epoch, evaluate

print("Start training")
start_time = time.time()
for epoch in range(args.start_epoch, args.epochs):
    train_one_epoch(
        tokenizer=tokenizer,
        model=model,
        criterion=criterion,
        data_loader=data_loader_train,
        optimizer=optimizer,
        device=device,
        epoch=epoch,
        max_norm=args.clip_max_norm,
    )
    lr_scheduler.step()

    postprocessor.reset_results()
    evaluate(
        tokenizer=tokenizer,
        model=model,
        criterion=criterion,
        postprocessor=postprocessor,
        data_loader=data_loader_val,
        epoch=epoch,
        device=device,
        tag="Validation",
    )
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print("Training time {}".format(total_time_str))