### **Evaluation**
داریم generate برای ارزیابی مدل نیاز به متد

این متد را تعریف نکرده بودیم MiniBART در کلاس

ارث بری میکند MiniBART اما اکنون مدلی جدید تعریف میکنیم که از کلاس

را دارد generate است و متد MiniBARTwithGen این کلاس جدید نامش

مدل حاصل از لابراتوری 13 را دوباره لود میکنیم ولی کلاس ایم مدل را کلاس جدید تعریف شده میگیریم

🌑 ارزیابی روی دو دیتاست انجام شده، درحقیقت ارزیابی برای دو منظور انجام میشود:
1. wikitext -> ارزیابی مدل برای اینکه ببینیم چقدر زبان را یاد گرفته
2. cnn/daily-mail -> ارزیابی مدل برای اینکه ببینیم چقدر توانایی خلاصه سازی دارد



🔽  **eval-results** 🔽

| dataset | test_size | rouge1 | rouge2 | rougeL | rougeLsum |
|:---:|:---:|:---:|:---:|:---:|:---:|
| wikitext | 3000 | 0.1121 | 0.0142 | 0.0890 | 0.0769 |
| cnn/dailymail | 2000 | 0.0934 | 0.0087 | 0.0775 | 0.0893 |


In [None]:
!pip install lightning evaluate rouge_score

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from datasets import load_dataset
from transformers import AutoTokenizer
import lightning.pytorch as pl
from lightning.pytorch.loggers import CSVLogger
import evaluate
import random
import math

In [3]:
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-base")
MASK_ID = tokenizer.mask_token_id
PAD_ID = tokenizer.pad_token_id
BOS_ID = tokenizer.bos_token_id
EOS_ID = tokenizer.eos_token_id

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

### **BART-Model**

In [4]:
class FFN(nn.Module):
  def __init__(self,d_model,d_ff,dropout=0.1):
    super().__init__()
    self.lin1 = nn.Linear(d_model,d_ff)
    self.lin2 = nn.Linear(d_ff,d_model)
    self.dropout = nn.Dropout(dropout)
  def forward(self,x):
    return self.lin2(self.dropout(F.gelu(self.lin1(x))))

class SelfAttn(nn.Module):
  def __init__(self,d_model,n_heads,dropout=0.1):
    super().__init__()
    self.attn = nn.MultiheadAttention(d_model,n_heads,dropout=dropout,batch_first=True)
    self.ln = nn.LayerNorm(d_model)
  def forward(self,x,attn_mask=None,key_padding_mask=None):
    h,_ = self.attn(x,x,x,attn_mask=attn_mask,key_padding_mask=key_padding_mask,need_weights=False)
    return self.ln(x+h)

class CrossAttn(nn.Module):
  def __init__(self,d_model,n_heads,dropout=0.1):
    super().__init__()
    self.attn = nn.MultiheadAttention(d_model,n_heads,dropout=dropout,batch_first=True)
    self.ln = nn.LayerNorm(d_model)
  def forward(self,x,mem,attn_mask=None,key_padding_mask=None,mem_padding_mask=None):
    h,_ = self.attn(x,mem,mem,attn_mask=attn_mask,key_padding_mask=key_padding_mask)
    return self.ln(x+h)

class EncoderLayer(nn.Module):
  def __init__(self,d_model,n_heads,d_ff,dropout=0.1):
    super().__init__()
    self.self_attn = SelfAttn(d_model,n_heads,dropout)
    self.ffn = FFN(d_model,d_ff,dropout)
    self.ln = nn.LayerNorm(d_model)
  def forward(self,x,key_padding_mask=None):
    x = self.self_attn(x,key_padding_mask=key_padding_mask)
    return self.ln(x+self.ffn(x))

class DecoderLayer(nn.Module):
  def __init__(self,d_model,n_heads,d_ff,dropout=0.1):
    super().__init__()
    self.self_attn = SelfAttn(d_model,n_heads,dropout)
    self.cross_attn = CrossAttn(d_model,n_heads,dropout)
    self.ffn = FFN(d_model,d_ff,dropout)
    self.ln = nn.LayerNorm(d_model)
  def forward(self,x,mem,tgt_key_padding_mask=None,mem_key_padding_mask=None,causal_mask=None):
    x = self.self_attn(x,attn_mask=causal_mask,key_padding_mask=tgt_key_padding_mask)
    x = self.cross_attn(x,mem,mem_padding_mask=mem_key_padding_mask)
    return self.ln(x+self.ffn(x))

In [5]:
class MiniBART(nn.Module):
  def __init__(self,vocab_size,d_model=256,n_heads=4,d_ff=1024,num_enc=3,num_dec=3,max_len=512):
    super().__init__()
    self.tok_emb = nn.Embedding(vocab_size,d_model)
    self.pos_emb = nn.Embedding(max_len,d_model)
    self.enc_layers = nn.ModuleList([EncoderLayer(d_model,n_heads,d_ff) for _ in range(num_enc)])
    self.dec_layers = nn.ModuleList([DecoderLayer(d_model,n_heads,d_ff) for _ in range(num_dec)])
    self.lm_head = nn.Linear(d_model,vocab_size,bias=False)

  def forward(self,src_ids,tgt_ids):

    def add_pos(x):
      b,L = x.shape
      pos = torch.arange(L,device=x.device).unsqueeze(0).expand(b,L)
      return self.tok_emb(x) + self.pos_emb(pos)

    src = add_pos(src_ids)
    tgt = add_pos(tgt_ids)

    # encoder
    mem = src
    for layer in self.enc_layers:
      mem = layer(mem)

    # causal mask for decoder self-attn
    L = tgt.size(1)
    causal = torch.triu(torch.ones(L,L,device=tgt.device)*float("-inf"),diagonal=1)

    out = tgt
    for layer in self.dec_layers:
      out = layer(out,mem,causal_mask=causal)

    logits = self.lm_head(out)
    return logits

⏬ new-model : MiniBARTwithGen

In [6]:
class MiniBARTwithGen(MiniBART):
  def generate(self, src_ids, max_len=128, bos_id=BOS_ID, eos_id=EOS_ID):
    self.eval()
    device = src_ids.device
    batch_size = src_ids.size(0)

    tgt_ids = torch.full((batch_size,1),bos_id, dtype=torch.long, device=device)

    for _ in range(max_len):
      logits = self(src_ids, tgt_ids)
      next_token = logits[:, -1, :].argmax(-1).unsqueeze(-1)
      tgt_ids = torch.cat([tgt_ids,next_token],dim=1)
      if (next_token == eos_id).all():
        break

    return tgt_ids

🔽  لود یک مدل ذخیره شده با کلاس جدید

In [7]:
vocab_size = tokenizer.vocab_size
d_model = 256
n_heads = 4
d_ff = 1024
num_enc = 3
num_dec = 3

model = MiniBARTwithGen(vocab_size, d_model, n_heads, d_ff, num_enc, num_dec)

# Load the state dictionary
model.load_state_dict(torch.load("/content/drive/MyDrive/Labratory_13_miniBAR_pretrain.pth"))

# Set the model to evaluation mode
model.eval()

MiniBARTwithGen(
  (tok_emb): Embedding(50265, 256)
  (pos_emb): Embedding(512, 256)
  (enc_layers): ModuleList(
    (0-2): 3 x EncoderLayer(
      (self_attn): SelfAttn(
        (attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
        )
        (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      )
      (ffn): FFN(
        (lin1): Linear(in_features=256, out_features=1024, bias=True)
        (lin2): Linear(in_features=1024, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    )
  )
  (dec_layers): ModuleList(
    (0-2): 3 x DecoderLayer(
      (self_attn): SelfAttn(
        (attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
        )
        (ln): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      )
      (c

In [8]:
# sample
sample_txt = "The quick brown fox jumps over the lazy dog."
inputs = tokenizer([sample_txt],return_tensors="pt",padding=True,truncation=True)

gen_ids = model.generate(inputs["input_ids"],max_len=50)
print(tokenizer.decode(gen_ids[0],skip_special_tokens=True))

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


The company's first-degree murder of the first-degree murder .
The company is the second-degree murder in the U.S.
The company is the second-degree murder of the U.S.
The company is the second


## **PRETRAIN-evaluation**

#### dataset

In [25]:
MAX_INPUT = 512
MAX_TARGET = 128

# دیتاست،متن خام
dataset = load_dataset("wikitext", "wikitext-103-v1")
test_txt = dataset["test"]["text"]

#### methods

In [26]:
# حذف خطوط خالی
def clean_lines(lines):
  out = []
  for line in lines:
    line = line.strip()
    if len(line) > 0:
      out.append(line)
  return out

# ابزار جمله بندی خیلی ساده
def split_sentences(x):
  out = []
  s = []
  for tok in x.split():
    s.append(tok)
    if tok.endswith(('.','!','?','."',"!'","?'")):
      out.append(" ".join(s))
      s = []
  if s:
    out.append(" ".join(s))
  return out if out else [x]

# نمونه گیری طول با توزیع پواسون
def simple_span_len(lam=3):
  L=0
  p = math.exp(-lam)
  F = p
  u = random.random()
  while u > F:
    L += 1
    p *= lam/L
    F += p
  return max(L,1)


def text_infilling_token_ids(ids,mask_ratio=0.3,lam=3):
  L = len(ids)
  num_to_mask = max(1,int(mask_ratio*L))
  masked = ids[:]
  covered = set()
  while len(covered) < num_to_mask:
    start = random.randrange(0,L)
    if start in covered:
      continue
    span_len = simple_span_len(lam)
    end = min(L,start+span_len)
    for i in range(start,end):
      if i not in covered:
        masked[i] = MASK_ID
        covered.add(i)
  return masked

def sentence_permute(text):
  sents = split_sentences(text)
  random.shuffle(sents)
  return " ".join(sents)

def apply_noise(text,do_sentperm=True,mask_ratio = 0.3,lam=3):
  if do_sentperm:
    text = sentence_permute(text)

  ids = tokenizer(text,truncation=True,max_length=MAX_INPUT,add_special_tokens=False)["input_ids"]
  if len(ids) == 0:
    return None

  noisy_ids = text_infilling_token_ids(ids,mask_ratio=mask_ratio,lam=lam)

  def pack(arr,max_len):
    arr = [BOS_ID] + arr[:max_len-2] + [EOS_ID]
    attn = [1]*len(arr)
    if len(arr) < max_len:
      pad_len = max_len - len(arr)
      arr += [PAD_ID]*pad_len
      attn += [0]*pad_len
    return arr, attn

  noisy_imp, noisy_attn = pack(noisy_ids,MAX_INPUT)
  clean_tgt, _  = pack(ids,MAX_TARGET)

  return {
      "input_ids":noisy_imp,
      "attention_mask":noisy_attn,
      "labels":clean_tgt
  }


def make_tensor_dataset(texts, n_limits=None):
  rows = []
  count = 0
  for t in texts:
    ex = apply_noise(t,do_sentperm=True,mask_ratio=0.3,lam=3)
    if ex is not None:
      rows.append(ex)
      count += 1
      if n_limits is not None and count >= n_limits:
          break

  input_ids = torch.tensor([r["input_ids"] for r in rows], dtype=torch.long)
  attention_mask = torch.tensor([r["attention_mask"] for r in rows], dtype=torch.long)
  labels = torch.tensor([r["labels"] for r in rows], dtype=torch.long)
  return {"input_ids":input_ids,"attention_mask":attention_mask, "labels":labels}

In [27]:
test_ids = make_tensor_dataset(test_txt[:3000],n_limits=3000)

In [28]:
def to_dataset_obj(d):
  return TensorDataset(d["input_ids"],d["attention_mask"],d["labels"])

class WrappedSet(torch.utils.data.Dataset):
  def __init__(self,d):
    self.input_ids = d["input_ids"]
    self.attention_mask = d["attention_mask"]
    self.labels = d["labels"]

  def __len__(self): return self.input_ids.size(0)
  def __getitem__(self,idx):
    return {
        "input_ids":self.input_ids[idx],
        "attention_mask":self.attention_mask[idx],
        "labels":self.labels[idx]
    }


🔽 **evaluation-model** 🔽

In [31]:
test_dataset_wiki = WrappedSet(test_ids)
test_loader_wiki = DataLoader(
    test_dataset_wiki,
    batch_size=4,
    shuffle=False,
    num_workers=2
)

device = "cuda" if torch.cuda.is_available() else "cpu"

model.eval()
model.to(device)

wiki_test_preds = []
wiki_test_labels = []

with torch.no_grad():
    for batch in test_loader_wiki:
        input_ids = batch["input_ids"].to(device)
        labels = batch["labels"].to(device)

        generated_ids = model.generate(input_ids, max_len=MAX_TARGET)

        # Decode the generated and true labels
        preds_batch = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        labels_batch = tokenizer.batch_decode(labels, skip_special_tokens=True)

        wiki_test_preds.extend(preds_batch)
        wiki_test_labels.extend(labels_batch)

# Compute ROUGE scores
rouge = evaluate.load("rouge")
wiki_results = rouge.compute(
    predictions=wiki_test_preds,
    references=wiki_test_labels,
    use_stemmer=True
)

print("Evaluation results on Wikitext test set:")
print(wiki_results)

Evaluation results on Wikitext test set:
{'rouge1': np.float64(0.11217163100231278), 'rouge2': np.float64(0.014261643632793254), 'rougeL': np.float64(0.08903827454187113), 'rougeLsum': np.float64(0.07691927671715536)}


🔽  **eval-results** 🔽

| dataset | test_size | rouge1 | rouge2 | rougeL | rougeLsum |
|:---:|:---:|:---:|:---:|:---:|:---:|
| wikitext | 3000 | 0.1121 | 0.0142 | 0.0890 | 0.0769 |


## **sumarize-evaluation**

🔽 **Dataset** 🔽

In [9]:
cnn_dmail = load_dataset("cnn_dailymail","3.0.0")

README.md: 0.00B [00:00, ?B/s]

train-00000-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00001-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00002-of-00003.parquet:   0%|          | 0.00/259M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

In [11]:
MAX_INPUT = 512
MAX_TARGET = 128

def prepprocess(batch):
  inputs = tokenizer(
      batch["article"],max_length=MAX_INPUT,padding="max_length",truncation=True
  )
  targets = tokenizer(
      batch["highlights"],max_length=MAX_TARGET,padding="max_length",truncation=True
  )
  inputs["labels"] = targets["input_ids"]
  return inputs

### **Test**

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

test_cnn_dmail = cnn_dmail["test"].select(range(2000))
tokenized_test_cnn_dmail = test_cnn_dmail.map(
    prepprocess,
    batched=True,
    remove_columns=test_cnn_dmail.column_names
)
test_cnn_dmail = tokenized_test_cnn_dmail.with_format("torch")

test_loader_cnn_dmail = DataLoader(
    test_cnn_dmail,
    batch_size=4,
    shuffle=False,
    num_workers=2
)

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

🔽 **evaluation** 🔽

In [23]:
# Evaluate the model on the test set
model.to(device)
model.eval()

test_preds = []
test_labels = []

with torch.no_grad():
    for batch in test_loader_cnn_dmail:
        input_ids = batch["input_ids"].to(device)
        labels = batch["labels"].to(device)

        # Use the generate method of the MiniBARTwithGen model
        generated_ids = model.generate(input_ids, max_len=MAX_TARGET)

        # Decode the generated and true labels
        preds_batch = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
        labels_batch = tokenizer.batch_decode(labels, skip_special_tokens=True)

        test_preds.extend(preds_batch)
        test_labels.extend(labels_batch)

# Compute ROUGE scores
rouge = evaluate.load("rouge")
results = rouge.compute(
    predictions=test_preds,
    references=test_labels,
    use_stemmer=True
)

print(results)

{'rouge1': np.float64(0.09341155441423496), 'rouge2': np.float64(0.008701909700815441), 'rougeL': np.float64(0.07751997793752094), 'rougeLsum': np.float64(0.08933795377417134)}


🔽  **eval-results** 🔽

| dataset | test_size | rouge1 | rouge2 | rougeL | rougeLsum |
|:---:|:---:|:---:|:---:|:---:|:---:|
| cnn/dailymail | 2000 | 0.0934 | 0.0087 | 0.0775 | 0.0893 |
