In [2]:
import torch
import torch.nn as nn
from modules.TransformerModule import TransformerModule
from config.core import config
import altair as alt
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [3]:
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [43]:
print(config.model_transformer.TR_model)

{'src_vocab_size': 1024, 'tgt_vocab_size': 100, 'src_seq_len': 350, 'tgt_seq_len': 500, 'seq_len': 350, 'lang_src': 'en', 'lang_tgt': 'it', 'd_model': 512, 'num_layer': 0, 'num_neads': 8, 'dropout': 0.0, 'd_ff': 256, 'lr': 0.0001, 'batch_size': 16, 'epochs': 20, 'ckpt_file': 'None', 'tokenizer_file': 'tokenizer_{0}.json'}


In [5]:
# Load model
from pathlib import Path
model_ckpt_file = Path(
    r"D:\ML_AI_DL_Projects\projects_repo\transformer\test_dir\test_01\version_1\checkpoints\epoch=19-step=36380.ckpt"
)

model = TransformerModule.load_from_checkpoint(model_ckpt_file)
model.to(device)

DS SIZE: 32332
Max lengh of src seq: 309
Max lengh of src tgt: 309


TransformerModule(
  (src_embeded): InputEmbeddings(
    (embedding): Embedding(15698, 512)
  )
  (tgt_embeded): InputEmbeddings(
    (embedding): Embedding(22463, 512)
  )
  (src_pos_enc): PositionalEncoding(
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (tgt_pos_enc): PositionalEncoding(
    (dropout): Dropout(p=0.0, inplace=False)
  )
  (encoder_blocks): ModuleList()
  (decoder_blocks): ModuleList()
  (encoder): Encoder(
    (layers): ModuleList()
    (norm_layer): LayerNormalization()
  )
  (decoder): Decoder(
    (layers): ModuleList()
    (norm_layer): LayerNormalization()
  )
  (proj_layer): ProjectionLayer(
    (proj_layer): Linear(in_features=512, out_features=22463, bias=True)
  )
  (transformer_model): Transformer(
    (encoder): Encoder(
      (layers): ModuleList()
      (norm_layer): LayerNormalization()
    )
    (decoder): Decoder(
      (layers): ModuleList()
      (norm_layer): LayerNormalization()
    )
    (src_embed): InputEmbeddings(
      (embedding): Embedd

In [6]:
val_ds = model.val_dataloader()

In [47]:
def load_next_batch():
    # Load a sample batch from the validation set
    batch = next(iter(val_ds))
    encoder_input = batch["encoder_input"]
    encoder_mask = batch["encoder_mask"]
    decoder_input = batch["decoder_input"]
    decoder_mask = batch["decoder_mask"]
    print(f"encoder_input:{encoder_input.shape}")
    encoder_input_tokens = [model.tokenizer_src.id_to_token(idx) for idx in encoder_input[0].cpu().numpy()]
    decoder_input_tokens = [model.tokenizer_tgt.id_to_token(idx) for idx in decoder_input[0].cpu().numpy()]

    # check that the batch size is 1
    assert encoder_input.size(
        0) == config.model_transformer.TR_model["batch_size"], "Batch size must be 1 for validation"

    model_out = model._greedy_decode(
        encoder_input, encoder_mask, model.tokenizer_src, model.tokenizer_tgt, config.model_transformer.TR_model['seq_len'], device)
    
    return batch, encoder_input_tokens, decoder_input_tokens

In [8]:
# Visualize data, takeover fun
def mtx2df(m, max_row, max_col, row_tokens, col_tokens):
    return pd.DataFrame(
        [
            (
                r,
                c,
                float(m[r, c]),
                "%.3d %s" % (r, row_tokens[r] if len(row_tokens) > r else "<blank>"),
                "%.3d %s" % (c, col_tokens[c] if len(col_tokens) > c else "<blank>"),
            )
            for r in range(m.shape[0])
            for c in range(m.shape[1])
            if r < max_row and c < max_col
        ],
        columns=["row", "column", "value", "row_token", "col_token"],
    )

In [9]:
#Threee attention encoder, decoder,cross-attn
def get_attn_map(attn_type: str, layer: int, head: int):
    if attn_type == "encoder":
        attn = model.encoder.layers[layer].self_attention_block.attention_scores
    elif attn_type == "decoder":
        attn = model.decoder.layers[layer].self_attention_block.attention_scores
    elif attn_type == "encoder-decoder":
        attn = model.decoder.layers[layer].cross_attention_block.attention_scores
    return attn[0, head].data

In [10]:
#Visualize attn map/ takeover fun
def attn_map(attn_type, layer, head, row_tokens, col_tokens, max_sentence_len):
    df = mtx2df(
        get_attn_map(attn_type, layer, head),
        max_sentence_len,
        max_sentence_len,
        row_tokens,
        col_tokens,
    )
    return (
        alt.Chart(data=df)
        .mark_rect()
        .encode(
            x=alt.X("col_token", axis=alt.Axis(title="")),
            y=alt.Y("row_token", axis=alt.Axis(title="")),
            color="value",
            tooltip=["row", "column", "value", "row_token", "col_token"],
        )
        #.title(f"Layer {layer} Head {head}")
        .properties(height=400, width=400, title=f"Layer {layer} Head {head}")
        .interactive()
    )

In [11]:
def get_all_attention_maps(attn_type: str, layers: list[int], heads: list[int], row_tokens: list, col_tokens, max_sentence_len: int):
    charts = []
    for layer in layers:
        rowCharts = []
        for head in heads:
            rowCharts.append(attn_map(attn_type, layer, head, row_tokens, col_tokens, max_sentence_len))
        charts.append(alt.hconcat(*rowCharts))
    return alt.vconcat(*charts)

In [50]:
batch, encoder_input_tokens, decoder_input_tokens = load_next_batch()
# print(f'Source: {batch[0]["src_text"][0]}')
# print(f'Target: {batch[0]["tgt_text"][0]}')
# sentence_len = encoder_input_tokens.index("[PAD]")

encoder_input:torch.Size([16, 350])
Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "D:\envs\pytorch_113_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 3460, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\dusan\AppData\Local\Temp\ipykernel_8692\723601448.py", line 1, in <module>
    batch, encoder_input_tokens, decoder_input_tokens = load_next_batch()
  File "C:\Users\dusan\AppData\Local\Temp\ipykernel_8692\3615831309.py", line 16, in load_next_batch
    model_out = model._greedy_decode(
  File "D:\ML_AI_DL_Projects\projects_repo\transformer\transformer_app\modules\TransformerModule.py", line 192, in _greedy_decode
    # Precompute the encoder output and reuse it for every step
  File "D:\ML_AI_DL_Projects\projects_repo\transformer\transformer_app\models\Transformer.py", line 32, in encode
    x = self.src_embed(x)
  File "D:\envs\pytorch_113_gpu\lib\site-packages\torch\nn\modules\module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "D:\ML_AI_DL

In [None]:
layers = [0, 1, 2]
heads = [0, 1, 2, 3, 4, 5, 6, 7]

# Encoder Self-Attention
get_all_attention_maps("encoder", layers, heads, encoder_input_tokens, encoder_input_tokens, min(20, sentence_len))
