In [123]:
5+5

10

In [124]:
import sys
import os
sys.path.append(os.path.abspath(".."))  # لو الـ Notebooks جوه المشروع


In [125]:
import torch 
import torch.nn as nn 
import sentencepiece as spm
from pathlib import Path
from src.config import get_config , get_weights_file_path
from src.transformer_model import get_model 
from src.data_loader import get_data_loader
from src.utils.decoding import greedy_decode
from src.utils.text import ids_to_text , text_to_ids

import altair as alt
import pandas as pd
import numpy as np 
import warnings

warnings.filterwarnings('ignore')


In [126]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = "cpu"
print(f"Using device: {device}")

Using device: cpu


In [127]:
config = get_config()

train_loader , valid_loader  = get_data_loader(config)

✅ DataLoader ready


In [128]:
model =  get_model(config).to(device)

model_filename = get_weights_file_path(config , f"02")
state = torch.load(model_filename)

model.load_state_dict(state['model_state_dict'])


<All keys matched successfully>

In [131]:
# Load tokenizers
proj_root = Path("__file__").resolve().parent.parent
tok_dir = proj_root / "tokenizer_models"
tokenizer_src = spm.SentencePieceProcessor(model_file=str(tok_dir / "spm_ar_unigram.model"))
tokenizer_trg = spm.SentencePieceProcessor(model_file=str(tok_dir / "spm_en_unigram.model"))

# cal model parameters

In [98]:
# بعد ما تبني الموديل وتحمل ال weights
from torchsummary import summary

# 1) الطريقة الأساسية: حساب كل البراميتار
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params}")
print(f"Trainable parameters: {trainable_params}")

Total parameters: 72543632
Trainable parameters: 72543632


# try the model

In [130]:
input_text = "اهلا بيك في مصر"

In [132]:
src_ids = text_to_ids(tokenizer_src, input_text, config["seq_len"]).unsqueeze(0).to(device)  # (1, S)
src_mask = (src_ids != 0).unsqueeze(1).unsqueeze(2)  # PAD_ID == 0

In [133]:
src_ids

tensor([[    1,  1155, 13867,     6,  2518,     2]])

In [134]:
src_mask

tensor([[[[True, True, True, True, True, True]]]])

In [None]:
model_out = greedy_decode(model , encoder_input , encoder_mask , config['seq_len'] , device)

Transformer(
  (encoder): Encoder(
    (layers): ModuleList(
      (0-3): 4 x EncoderBlock(
        (self_attention_block): MultiHeadAttentionBlock(
          (w_q): Linear(in_features=512, out_features=512, bias=True)
          (w_k): Linear(in_features=512, out_features=512, bias=True)
          (w_v): Linear(in_features=512, out_features=512, bias=True)
          (w_o): Linear(in_features=512, out_features=512, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (feed_forward_block): FeedForward(
          (fc1): Linear(in_features=512, out_features=2048, bias=True)
          (fc2): Linear(in_features=2048, out_features=512, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (activation): ReLU()
        )
        (residual_connections): ModuleList(
          (0-1): 2 x ResidualConnection(
            (dropout): Dropout(p=0.1, inplace=False)
            (norm): LayerNormalization()
          )
        )
      )
    )
    (norm): LayerN

# visulaize the attentions

In [None]:


def load_next_batch():
    batch = next(iter(valid_loader))
    encoder_input = batch['src_input'].to(device)        # (B, S)
    decoder_input = batch['decoder_input'].to(device)    # (B, T)
    encoder_mask = batch['src_mask'].to(device)          # (B, 1, 1, S)
    decoder_mask = batch['trg_mask'].to(device)          # (B, 1, T, T
    
    # check that the batch size is 1
    assert encoder_input.size(
        0) == 1, "Batch size must be 1 for validation"
    
    model_out = greedy_decode(model , encoder_input , encoder_mask , config['seq_len'] , device)
    src_text , trg_text = ids_to_text(tokenizer_src ,encoder_input) , ids_to_text(tokenizer_trg , decoder_input)
    
    return batch , src_text  , trg_text
    

In [120]:
model.encoder.layers[0].self_attention_block.attention_score

AttributeError: 'MultiHeadAttentionBlock' object has no attribute 'attention_score'

In [118]:
def mtx2df(m, max_row, max_col, row_tokens, col_tokens):
    return pd.DataFrame(
        [
            (
                r,
                c,
                float(m[r, c]),
                "%.3d %s" % (r, row_tokens[r] if len(row_tokens) > r else "<blank>"),
                "%.3d %s" % (c, col_tokens[c] if len(col_tokens) > c else "<blank>"),
            )
            for r in range(m.shape[0])
            for c in range(m.shape[1])
            if r < max_row and c < max_col
        ],
        columns=["row", "column", "value", "row_token", "col_token"],
    )

def get_attn_map(attn_type: str, layer: int, head: int):
    if attn_type == "encoder":
        attn = model.encoder.layers[layer].self_attention_block.attention_score
    elif attn_type == "decoder":
        attn = model.decoder.layers[layer].self_attention_block.attention_score
    elif attn_type == "encoder-decoder":
        attn = model.decoder.layers[layer].cross_attention_block.attention_score
    return attn[0, head].data

def attn_map(attn_type, layer, head, row_tokens, col_tokens, max_sentence_len):
    df = mtx2df(
        get_attn_map(attn_type, layer, head),
        max_sentence_len,
        max_sentence_len,
        row_tokens,
        col_tokens,
    )
    return (
        alt.Chart(data=df)
        .mark_rect()
        .encode(
            x=alt.X("col_token", axis=alt.Axis(title="")),
            y=alt.Y("row_token", axis=alt.Axis(title="")),
            color="value",
            tooltip=["row", "column", "value", "row_token", "col_token"],
        )
        #.title(f"Layer {layer} Head {head}")
        .properties(height=400, width=400, title=f"Layer {layer} Head {head}")
        .interactive()
    )

def get_all_attention_maps(attn_type: str, layers: list[int], heads: list[int], row_tokens: list, col_tokens, max_sentence_len: int):
    charts = []
    for layer in layers:
        rowCharts = []
        for head in heads:
            rowCharts.append(attn_map(attn_type, layer, head, row_tokens, col_tokens, max_sentence_len))
        charts.append(alt.hconcat(*rowCharts))
    return alt.vconcat(*charts)


In [119]:
batch, encoder_input_tokens, decoder_input_tokens = load_next_batch(num = 1)
print(f'Source: {batch["src_input"][0]}')
print(f'Target: {batch["decoder_input"][0]}')

TypeError: 'DataLoader' object is not subscriptable

In [114]:
batch["src_input"][0].tolist()

[1,
 184,
 7843,
 4035,
 397,
 8,
 4276,
 144,
 210,
 6,
 10104,
 8451,
 715,
 4442,
 2717,
 306,
 1486,
 5,
 2]

In [121]:
encoder_input_tokens

'ومن المامول فيه ان تستمر الدول الاطراف في ابداء الارادة السياسية الضرورية لتحسين عملية الاستعراض.'

In [122]:
decoder_input_tokens

'It was hoped that States parties would continue to show the political will necessary to create better conditions for the review process.'

In [93]:

sentence_len = batch["src_input"][0].size(0)
layers = [0, 1, 2]
heads = [0, 1, 2]

# Encoder Self-Attention
get_all_attention_maps("encoder", layers, heads, encoder_input_tokens.split(), encoder_input_tokens.split(), min(20, sentence_len))


In [94]:
# Encoder Self-Attention
get_all_attention_maps("decoder", layers, heads, decoder_input_tokens.split(), decoder_input_tokens.split(), min(20, sentence_len))


In [95]:
# Encoder Self-Attention
get_all_attention_maps("encoder-decoder", layers, heads, encoder_input_tokens.split(), decoder_input_tokens.split(), min(20, sentence_len))