In [3]:
from T5FineTuner import T5FineTuner, RPDataset
from utils import get_folds
import torch
import argparse
from transformers import T5Tokenizer
from torch.utils.data import Dataset, DataLoader

DATASET = "RP-Crowd-3"
MODEL_NAME_OR_PATH = "GermanT5/t5-efficient-oscar-german-small-el32"
WANDB_PROJECT_NAME = "rp-crowd-3-folds-t5-efficient-small-el32"
OUTPUT_DIR = "./GermanT5-RP-Mod/t5-efficient-oscar-german-small-el32/"
SOURCE = f"./Datasets/{DATASET}-folds.csv"

tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME_OR_PATH)
#TODO: can I automatically pick the best checkpoint path?
checkpoint_path = "./GermanT5-RP-Mod/t5-efficient-oscar-german-small-el32/lr-0.0004-wd-0.1/epoch=3-val_accuracy=0.74-val_loss=0.28.ckpt"
train_inputs, train_targets, val_inputs, val_targets = get_folds(SOURCE)

train_dataset = RPDataset(tokenizer, train_inputs, train_targets)
valid_dataset = RPDataset(tokenizer, val_inputs, val_targets)

checkpoint = torch.load(checkpoint_path)
hparams = checkpoint["hyper_parameters"]
hparams["train_dataset"] = train_dataset
hparams["val_dataset"] = valid_dataset
new_args = argparse.Namespace(**hparams)

pretrained = T5FineTuner.load_from_checkpoint(checkpoint_path, hparams=new_args)

In [4]:
import torch
import torch.nn as nn
# from torchvision.models.utils import load_state_dict_from_url
from typing import Type, Any, Callable, Union, List, Dict, Optional, cast
from torch import Tensor
from collections import OrderedDict 

In [5]:
class ForwardHook():
    def __init__(self, module):
        self.hook = module.register_forward_hook(self.hook_fn)
    def hook_fn(self, module, input, output):
        self.input = input
        self.output = output
    def close(self):
        self.hook.remove()

In [42]:
layer = dict(pretrained.model._modules.items())["decoder"]
hook = ForwardHook(layer)

In [43]:
from transformers import T5Tokenizer
data = "classification Ich stelle mal folgende Vermutung an so wie es auch bei Tätern mit Migrationshintergrund hier immer dargestellt wird Einzeltäter verwirrt traumatisiert hat nix mit den Rechtsextremen zu tun"
# inputs = {"text":[data]}
# data = "ich stelle mal folgende Vermutung an"
# data = ["ich", "stelle", "mal"]
# tokenized = tokenizer.batch_encode_plus(data, max_length=512, padding=True, truncation=True, return_tensors="pt")

tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME_OR_PATH)
tokenized = tokenizer.encode(data, max_length=512, padding=True, truncation=True, return_tensors="pt")
print(tokenized)
out = pretrained.model.generate(input_ids=tokenized, 
                                # attention_mask=tokenized["source_mask"],
                                max_length=2)


tensor([[23574, 22822,   107,  6496,   141,  2192, 18247,    33,    60,    48,
            27,    35,    41,  4519,    21,    15, 14489, 15256,   116,   101,
          3655,    53,  1453, 13920, 23224, 12106,  3702,  3050,    56,  7007,
            15,    16,   973, 17693,     9,    14,   503,     1]])


In [44]:
hook.output

BaseModelOutputWithPastAndCrossAttentions(last_hidden_state=tensor([[[-1.5888e-01,  1.6675e-01, -5.0194e-02, -3.4081e-01, -9.0620e-02,
           8.8041e-02, -1.6488e-01,  3.0255e-02,  2.4732e-01,  1.9800e-01,
          -1.4810e-01,  1.7747e-01, -7.2410e-02, -2.7423e-01,  1.3715e-01,
          -2.7753e-02,  8.3768e-02, -1.0054e-01, -6.0705e-02,  3.5369e-02,
          -2.0839e-01, -9.6032e-02, -3.7310e-02, -6.7081e-03, -1.3011e-03,
          -2.7690e-02,  2.0287e-02, -1.2359e-02, -1.4545e-01, -8.7353e-02,
          -1.7021e-01,  3.1517e-03,  1.8281e-02, -8.2371e-02,  2.0901e-01,
          -1.9613e-01,  6.4268e-02, -6.9938e-02,  6.1749e-02, -1.0153e-01,
          -1.2193e-02, -1.4682e-01, -2.7348e-01,  1.4211e-01,  2.6999e-01,
          -9.8812e-02, -7.5829e-02,  1.0441e-02,  4.6193e-03, -8.6598e-02,
           5.7329e-02, -5.7058e-04,  2.0230e-02,  2.2517e-01, -2.9329e-01,
          -1.8443e-01,  1.8172e-01, -7.8949e-02,  6.3828e-02, -1.1897e-01,
          -1.1417e+00,  1.1916e-01,  1.4

In [23]:
import transformers
transformers.__version__

'4.19.2'

In [7]:
class GetLayerModel(nn.Module):
    def __init__(self, output_layers, pretrained, *args):
        super().__init__(*args)
        self.output_layers = output_layers
        #print(self.output_layers)
        # store the outputs of the layers
        self.selected_out = OrderedDict()
        #PRETRAINED MODEL
        self.pretrained = pretrained
        self.fhooks = []

        for i,l in enumerate(list(self.pretrained._modules.keys())):
            if i in self.output_layers:
                self.fhooks.append(getattr(self.pretrained,l).register_forward_hook(self.forward_hook(l)))
    
    def forward_hook(self,layer_name):
        def hook(module, input, output):
            self.selected_out[layer_name] = output
        return hook

    def forward(self, x):
        out = self.pretrained(x)
        return out, self.selected_out

In [13]:
len(list(pretrained.named_modules()))

709

In [20]:
lst = list(pretrained.named_modules())
for name, layer in lst:
    print(name)
    # print("-"*100)
    # print(layer)


model
model.shared
model.encoder
model.encoder.block
model.encoder.block.0
model.encoder.block.0.layer
model.encoder.block.0.layer.0
model.encoder.block.0.layer.0.SelfAttention
model.encoder.block.0.layer.0.SelfAttention.q
model.encoder.block.0.layer.0.SelfAttention.k
model.encoder.block.0.layer.0.SelfAttention.v
model.encoder.block.0.layer.0.SelfAttention.o
model.encoder.block.0.layer.0.SelfAttention.relative_attention_bias
model.encoder.block.0.layer.0.layer_norm
model.encoder.block.0.layer.0.dropout
model.encoder.block.0.layer.1
model.encoder.block.0.layer.1.DenseReluDense
model.encoder.block.0.layer.1.DenseReluDense.wi
model.encoder.block.0.layer.1.DenseReluDense.wo
model.encoder.block.0.layer.1.DenseReluDense.dropout
model.encoder.block.0.layer.1.DenseReluDense.relu_act
model.encoder.block.0.layer.1.layer_norm
model.encoder.block.0.layer.1.dropout
model.encoder.block.1
model.encoder.block.1.layer
model.encoder.block.1.layer.0
model.encoder.block.1.layer.0.SelfAttention
model.enco

In [None]:
model = GetLayerModel()

In [21]:
%pip install torchknickknacks

Defaulting to user installation because normal site-packages is not writeable
[31mERROR: Could not find a version that satisfies the requirement torchknickknacks (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for torchknickknacks[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.


In [None]:
from torchknickknacks import modelutils

layer = "lm_head"