In [1]:
# Clone the entire repo.
!git clone -l -s git://github.com/Zumo09/Feedback-Prize.git cloned-repo
%cd cloned-repo
!ls

Cloning into 'cloned-repo'...
remote: Enumerating objects: 16151, done.[K
remote: Counting objects: 100% (16151/16151), done.[K
remote: Compressing objects: 100% (16019/16019), done.[K
remote: Total 16151 (delta 295), reused 15884 (delta 108), pack-reused 0[K
Receiving objects: 100% (16151/16151), 143.47 MiB | 15.22 MiB/s, done.
Resolving deltas: 100% (295/295), done.
Checking out files: 100% (15640/15640), done.
/content/cloned-repo
class_correlation.ipynb  engine.py  main.py    prova.ipynb	 util
csv			 input	    models     prova_main.ipynb
datasets		 LICENSE    old_utils  README.md


In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 8.7 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 60.4 MB/s 
[?25hCollecting tokenizers!=0.11.3,>=0.10.1
  Downloading tokenizers-0.11.4-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.8 MB)
[K     |████████████████████████████████| 6.8 MB 51.3 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 6.5 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 62.8 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyy

In [3]:
%reload_ext autoreload
%autoreload 2

In [4]:
import torch

In [5]:
from argparse import Namespace

args = Namespace(
    num_queries=20,
    hidden_dim=10,
    random_state=42,
    preprocessing=True,
    test_size=0.2,
    batch_size = 5,
    input_path="./input/feedback-prize-2021/",
    device="cpu",
    set_cost_class=1,
    set_cost_bbox=1,
    set_cost_giou=1,
    bbox_loss_coef=0.5,
    giou_loss_coef=0.1,
    eos_coef=0.1,
    num_workers=2,
    start_epoch=0,
    epochs=10,
    lr=0.001,
    weight_decay=0.1,
    lr_drop=1,
    clip_max_norm=0
)

device = torch.device(args.device)

In [6]:
from datasets import build_fdb_data, collate_fn

dataset_train, dataset_val, postprocessor, num_classes = build_fdb_data(args)

100%|██████████| 15594/15594 [00:01<00:00, 11899.52it/s]


In [7]:
from models import build_models

tokenizer, model, criterion = build_models(num_classes, args)
model.to(device)

n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("number of params:", n_parameters)

Downloading:   0%|          | 0.00/1.07k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/618M [00:00<?, ?B/s]

Some weights of the model checkpoint at allenai/led-base-16384 were not used when initializing LEDModel: ['final_logits_bias', 'lm_head.weight']
- This IS expected if you are initializing LEDModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing LEDModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Downloading:   0%|          | 0.00/27.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/772 [00:00<?, ?B/s]

number of params: 161950614


In [9]:
import time
import datetime
from torch.utils.data import DataLoader

optimizer = torch.optim.AdamW(
    model.parameters(), lr=args.lr, weight_decay=args.weight_decay
)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop)

data_loader_train = DataLoader(
    dataset_train,
    shuffle=True,
    batch_size=args.batch_size,
    collate_fn=collate_fn,
    num_workers=args.num_workers,
)
data_loader_val = DataLoader(
    dataset_val,
    shuffle=False,
    batch_size=args.batch_size,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=args.num_workers,
)

In [None]:
from tqdm import tqdm

model.train()
criterion.train()

loss_list = []
data_bar = tqdm(data_loader_train, desc=f"Train Epoch {0:4d}")
for samples, targets, info in data_bar:
    st = time.time()

    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    outputs = []
    for doc in samples:
        inputs = tokenizer([doc]).to(device)
        outputs.append(model(inputs))
    
    batch_outputs = {
        key: torch.cat([o[key] for o in outputs])
        for key in outputs[0].keys()
    }
    
    loss_dict = criterion(batch_outputs, targets)  # type: Dict[str, torch.Tensor]
    
    mt = time.time()
    
    weight_dict = criterion.weight_dict
    losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict)  # type: ignore

    loss_dict_unscaled = {f"{k}_unscaled": v for k, v in loss_dict.items()}
    loss_dict_scaled = {
        k: v * weight_dict[k] for k, v in loss_dict.items() if k in weight_dict
    }
    losses_scaled = sum(loss_dict_scaled.values())  # type: ignore

    loss_value = losses_scaled.item()  # type: ignore

    optimizer.zero_grad()
    losses.backward()  # type: ignore
    optimizer.step()
    
    ot = time.time()
    
    loss_list.append(losses.item())  # type: ignore
    data_bar.set_postfix(
        {
            "lr": optimizer.param_groups[0]["lr"],
            "mean_loss": sum(loss_list) / len(loss_list),
            "model time": f'{mt - st:.2f} s',
            "optim time": f'{ot - mt:.2f} s'
        }
    )


Train Epoch    0:   0%|          | 2/2495 [02:19<47:24:13, 68.45s/it, lr=0.001, mean_loss=4.04, model time=14.98 s, optim time=46.78 s]

In [10]:
from engine import train_one_epoch, evaluate

print("Start training")
start_time = time.time()
for epoch in range(args.start_epoch, args.epochs):
    train_one_epoch(
        tokenizer=tokenizer,
        model=model,
        criterion=criterion,
        data_loader=data_loader_train,
        optimizer=optimizer,
        device=device,
        epoch=epoch,
        max_norm=args.clip_max_norm,
    )
    lr_scheduler.step()

    postprocessor.reset_results()
    evaluate(
        tokenizer=tokenizer,
        model=model,
        criterion=criterion,
        postprocessor=postprocessor,
        data_loader=data_loader_val,
        epoch=epoch,
        device=device,
        tag="Validation",
    )
total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print("Training time {}".format(total_time_str))

Start training


Train Epoch    0:   0%|          | 0/2495 [00:19<?, ?it/s]


TypeError: ignored