In [4]:
import torch
from torch.nn.utils.rnn import pad_sequence
from transformers import T5ForConditionalGeneration
from tokenization_enc_dec import EncDecTokenizer

In [2]:
model = T5ForConditionalGeneration.from_pretrained('./torch_eva/')
tokenizer = EncDecTokenizer('./EVA/src/bpe_dialog_new/vocab.txt')

In [52]:
def data_flow():
    import random
    while True:
        x = '你' * random.randint(1, 5)
        y = '我' * random.randint(3, 15)
        yield x, y

def get_single_data(x='你好', y='你也好'):
    input_ids = tokenizer.encode(x) + [tokenizer.sep_id, tokenizer.get_sentinel_id(0)]
    y_ids = tokenizer.encode(y)
    decoder_input_ids = [tokenizer.get_sentinel_id(0)] + y_ids
    labels = y_ids + [tokenizer.sep_id]
    return torch.LongTensor(input_ids), torch.LongTensor(decoder_input_ids), torch.LongTensor(labels)


def get_batch_data(batch_size=16):
    batch = []
    for x, y in data_flow():
        ids, dids, lbl = get_single_data(x, y)
        batch.append((ids, dids, lbl))
        if len(batch) >= batch_size:
            input_ids = pad_sequence([
                x[0]
                for x in batch
            ], batch_first=True, padding_value=tokenizer.pad_id)
            mask = (input_ids != tokenizer.pad_id).to(input_ids.dtype)
            decoder_input_ids = pad_sequence([
                x[1]
                for x in batch
            ], batch_first=True, padding_value=tokenizer.pad_id)
            decoder_mask = (decoder_input_ids != tokenizer.pad_id).to(input_ids.dtype)
            # padding -100是源代码里面的magic number， 参考：
            # https://github.com/huggingface/transformers/blob/1c06240e1b3477728129bb58e7b6c7734bb5074e/src/transformers/models/t5/modeling_t5.py#L1580
            labels = pad_sequence([
                x[2]
                for x in batch
            ], batch_first=True, padding_value=-100)
            yield input_ids, mask, decoder_input_ids, decoder_mask, labels
            batch = []

In [55]:
fp16 = False
cuda = False

if fp16:
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=1e-4,
        eps=1e-4
    )
else:
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=1e-4,
    )

In [1]:
step = 0

In [2]:
losses = []
optimizer.zero_grad()
for x, m0, y, m1, z in get_batch_data(3):
    if cuda:
        x = x.cuda()
        y = y.cuda()
        z = z.cuda()
        m0 = m0.cuda()
        m1 = m1.cuda()
    with torch.cuda.amp.autocast():
        out = model(
            input_ids=x,
            attention_mask=m0,
            decoder_input_ids=y,
            decoder_attention_mask=m1,
            labels=z
        )
        loss = out.loss
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    losses.append(loss.detach().cpu().numpy())
    losses = losses[-1000:]
    pbar.set_description(f'step: {step} loss: {np.mean(losses):.4f}')
    step += 1

NameError: name 'optimizer' is not defined