In [1]:
from transformers import AutoTokenizer, T5EncoderModel

tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
model = T5EncoderModel.from_pretrained("google/flan-t5-large")
input_ids = tokenizer(
    "Studies have been shown that owning a dog is good for you", return_tensors="pt"
).input_ids  # Batch size 1
outputs = model(input_ids=input_ids)
last_hidden_states = outputs.last_hidden_state

[2024-01-10 21:50:14,178] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [8]:
outputs.last_hidden_state.size(), input_ids.size()

(torch.Size([1, 15, 1024]), torch.Size([1, 15]))

In [2]:
from torch import nn

l = nn.AvgPool2d((15, 1))
l1 = nn.Flatten()
l1(l(last_hidden_states)).size(), last_hidden_states.size()

(torch.Size([1, 1024]), torch.Size([1, 15, 1024]))

In [3]:
from transformers import PreTrainedModel, T5EncoderModel
from torch import nn

class CustomT5Model(PreTrainedModel):
    def __init__(self, config, base_model):
        super(CustomT5Model, self).__init__(config)
        self.t5 = T5EncoderModel.from_pretrained(
            base_model,
            config=config
        )
        ### New layers:
        self.regression_layer = nn.Sequential(
            nn.AvgPool2d((15, 1)),
            nn.Flatten(),
            nn.Linear(config.hidden_size, 512),
            nn.GELU(),
            nn.Linear(512, 256),
            nn.GELU(),
            nn.Linear(256, 128),
            nn.GELU(),
            nn.Linear(128, 64),
            nn.GELU(),
            nn.Linear(64, 32),
            nn.GELU(),
            nn.Linear(32, 16),
            nn.GELU(),
            nn.Linear(16, 6),
            nn.Sigmoid(),
        )
    def forward(self, **inputs):
        t5_outputs = self.t5(**inputs)
        logits = self.regression_layer(t5_outputs.last_hidden_state)
        return logits

    def _init_weights(self, module):
        self.t5._init_weights(module)

In [4]:
from transformers import AutoConfig, AutoTokenizer
config = AutoConfig.from_pretrained('google/flan-t5-large')
t5_model = CustomT5Model(
    config=config,
    base_model='google/flan-t5-large'
)
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")

In [5]:
input_ids = tokenizer(
    "Studies have been shown that owning a dog is good for you", return_tensors="pt"
).input_ids  # Batch size 1
outputs = t5_model(input_ids=input_ids)
outputs.size()

torch.Size([1, 6])

In [13]:
import torch
loss_fct = nn.MSELoss()
labels = torch.Tensor([[0.5] * 6])
loss = loss_fct(outputs, labels[0])
loss, outputs, labels

  return F.mse_loss(input, target, reduction=self.reduction)


(tensor(0.0008, grad_fn=<MseLossBackward0>),
 tensor([[0.5520, 0.5265, 0.5085, 0.4967, 0.5344, 0.5015]],
        grad_fn=<SigmoidBackward0>),
 tensor([[0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000]]))

In [None]:
from transformers import Trainer

class CustomTrainer(Trainer):
    def __int__(self, *args, **kwargs):
        super().__int__(*args, **kwargs)
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs['labels']
        # forward pass
        logits = model(
            input_ids=inputs['input_ids'].to(model.device),
            attention_mask=inputs['attention_mask'].to(model.device),
        )
        loss_fct = nn.MSELoss()
        loss = loss_fct(logits, labels)
        return (loss, logits) if return_outputs else loss