In [1]:
import os
from argparse import Namespace

In [None]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

In [None]:
import pytorch_lightning as pl

In [None]:
from src.data.make_conll2003 import get_example_sets, InputExample

In [None]:
from src.models.modeling_t5ner import T5ForNER
from src.models.modeling_t5 import WeightedT5
from src.models.modeling_conll2003 import T5ForConll2003

In [None]:
hparams = {"experiment_name": "Overfit T5 on CoNLL2003",
           "batch_size": 1, "num_workers": 2,
           "optimizer": "Adam", "lr": 5e-3,
           "datapath": "../data/conll2003",
           "source_max_length": 128,
           "target_max_length": 256,
           "labels_mode": 'tokens',
           "shuffle_train": False,
           "pretrained_model": 't5-small'
           }
hparams = Namespace(**hparams)

In [None]:
model = T5ForConll2003.from_pretrained(hparams.pretrained_model, hparams=hparams)

In [None]:
model.source_max_length

In [None]:
model.tokenizer.tokenize('<O>')

In [None]:
trainer = pl.Trainer(fast_dev_run=True)

In [11]:
trainer.fit(model)


    | Name                                                            | Type                  | Params
------------------------------------------------------------------------------------------------------
0   | shared                                                          | Embedding             | 16 M  
1   | encoder                                                         | T5Stack               | 35 M  
2   | encoder.block                                                   | ModuleList            | 18 M  
3   | encoder.block.0                                                 | T5Block               | 3 M   
4   | encoder.block.0.layer                                           | ModuleList            | 3 M   
5   | encoder.block.0.layer.0                                         | T5LayerSelfAttention  | 1 M   
6   | encoder.block.0.layer.0.SelfAttention                           | T5Attention           | 1 M   
7   | encoder.block.0.layer.0.SelfAttention.q                         | 

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple)


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…




1

In [None]:
trainer.test(model)

In [None]:
model.prepare_data()

In [None]:
model.train_dataset

In [None]:
batch = next(iter(model.train_dataloader()))

In [None]:
for name in model.modules():
    print(name)