### For k merging

In [30]:
import torch, numpy as np
from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config
from datasets import load_dataset
from scipy.optimize import linear_sum_assignment

# -------------------------
# 1. Load model & tokenizer
# -------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

model = GPT2LMHeadModel.from_pretrained("gpt2").to(device).eval()
model.config.pad_token_id = tokenizer.eos_token_id

# -------------------------
# 2. Extract FFN pre-activation features
# -------------------------
def extract_ffn_features(model, texts, max_tokens=4000):
    features = {i: [] for i in range(len(model.transformer.h))}
    hooks = []

    def create_hook(layer):
        def hook(module, inp, out):
            features[layer].append(out.detach().cpu())
        return hook

    for i, block in enumerate(model.transformer.h):
        hooks.append(block.mlp.c_fc.register_forward_hook(create_hook(i)))

    enc = tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=64).to(device)
    with torch.no_grad():
        model(**enc)

    for h in hooks: h.remove()

    X = {}
    for i in features:
        x = torch.cat(features[i], dim=0)
        x = x.view(-1, x.shape[-1])
        if x.shape[0] > max_tokens:
            x = x[:max_tokens]
        X[i] = x.numpy()
    return X

dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="test[:1%]")
texts = [t for t in dataset['text'] if len(t.strip()) > 0][:64]
feature_mats = extract_ffn_features(model, texts)

# -------------------------
# 3. Get neuron alignment between layers using Hungarian algorithm
# -------------------------
def get_alignment(X_a, X_b):
    C = np.corrcoef(X_a, X_b, rowvar=False)
    C = C[:X_a.shape[1], X_a.shape[1]:]
    row_ind, col_ind = linear_sum_assignment(-C)
    return col_ind, float(C[row_ind, col_ind].mean())

# -------------------------
# 4. Permute FFN Weights for Alignment
# -------------------------
def permute_ffn(model, layer, perm):
    block = model.transformer.h[layer].mlp
    perm = torch.tensor(perm, dtype=torch.long, device=device)

    # GPT-2 weight shapes are transposed in your setup:
    # c_fc:   [768, 3072]
    # c_proj: [3072, 768]
    with torch.no_grad():
        block.c_fc.weight[:] = block.c_fc.weight[:, perm]
        block.c_fc.bias[:]   = block.c_fc.bias[perm]
        block.c_proj.weight[:] = block.c_proj.weight[perm, :]

# -------------------------
# 5. Merge k FFN layers: average aligned weights into the first layer
# -------------------------
def merge_k_layers(model, layers, perms):
    anchor = layers[0]
    blk_anchor = model.transformer.h[anchor].mlp

    W1 = blk_anchor.c_fc.weight.data.clone()
    b1 = blk_anchor.c_fc.bias.data.clone()
    W2 = blk_anchor.c_proj.weight.data.clone()
    b2 = blk_anchor.c_proj.bias.data.clone()

    for layer in layers[1:]:
        permute_ffn(model, layer, perms[layer])
        blk = model.transformer.h[layer].mlp
        W1 += blk.c_fc.weight
        b1 += blk.c_fc.bias
        W2 += blk.c_proj.weight
        b2 += blk.c_proj.bias

    k = len(layers)
    blk_anchor.c_fc.weight.data = W1 / k
    blk_anchor.c_fc.bias.data   = b1 / k
    blk_anchor.c_proj.weight.data = W2 / k
    blk_anchor.c_proj.bias.data   = b2 / k

# -------------------------
# 6. Compress Model by keeping only merged layers
# -------------------------
import copy

def compress_model(model, keep_layers, device="cuda" if torch.cuda.is_available() else "cpu"):
    # 1) build a full config dict from the original and change n_layer
    cfg_dict = model.config.to_dict()
    cfg_dict["n_layer"] = len(keep_layers)
    new_cfg = GPT2Config(**cfg_dict)

    # 2) instantiate compressed model
    compressed = GPT2LMHeadModel(new_cfg).to(device)

    with torch.no_grad():
        # 3) copy embeddings (handle positional length mismatch safely)
        compressed.transformer.wte.weight.copy_(model.transformer.wte.weight)
        # if compressed's wpe is smaller, slice original; if same, copy whole
        compressed.transformer.wpe.weight.copy_(
            model.transformer.wpe.weight[: compressed.transformer.wpe.weight.shape[0], :]
        )

        # 4) copy each kept layer's state_dict into the new model's layer
        for new_i, old_i in enumerate(keep_layers):
            compressed.transformer.h[new_i].load_state_dict(
                copy.deepcopy(model.transformer.h[old_i].state_dict())
            )

        # 5) copy final layernorm and lm_head
        compressed.transformer.ln_f.weight.copy_(model.transformer.ln_f.weight)
        compressed.transformer.ln_f.bias.copy_(model.transformer.ln_f.bias)
        compressed.lm_head.weight.copy_(model.lm_head.weight)

        # 6) ensure config tokens are consistent
        if hasattr(model.config, "pad_token_id"):
            compressed.config.pad_token_id = model.config.pad_token_id
        compressed.config.vocab_size = model.config.vocab_size

    return compressed


# -------------------------
# 7. RUN MERGING FOR ANY k
# -------------------------
k = 8  # change to 4,6,8,10 etc.

layer_groups = [list(range(i, min(i+k, len(model.transformer.h)))) for i in range(0, 12, k)]
perms = {}
# [[0,1,2,3], [4,5,6,7], [8,9,10,11]] for k=4
for group in layer_groups:
    anchor = group[0]
    for layer in group[1:]:
        perm, score = get_alignment(feature_mats[anchor], feature_mats[layer])
        perms[layer] = perm
        print(f"Aligned Layer {anchor} ‚Üî Layer {layer}, mean corr = {score:.4f}")
    merge_k_layers(model, group, perms)

# -------------------------
#  Save merged (full depth) model before compression
merged_dir = f"gpt2_merged_full_model{k}"
model.save_pretrained(merged_dir)
tokenizer.save_pretrained(merged_dir)
print(f" Saved merged full model to {merged_dir}")

compressed_model = compress_model(model, [g[0] for g in layer_groups])
# torch.save(compressed_model, f"compressed_model{k}.pth")
save_dir = f"gpt2_compressed_half_model{k}"   # save to a directory, not a file
compressed_model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)  # also save tokenizer for safety


Aligned Layer 0 ‚Üî Layer 1, mean corr = 0.3761
Aligned Layer 0 ‚Üî Layer 2, mean corr = 0.3878
Aligned Layer 0 ‚Üî Layer 3, mean corr = 0.3948
Aligned Layer 0 ‚Üî Layer 4, mean corr = 0.3776
Aligned Layer 0 ‚Üî Layer 5, mean corr = 0.3726
Aligned Layer 0 ‚Üî Layer 6, mean corr = 0.3724
Aligned Layer 0 ‚Üî Layer 7, mean corr = 0.3653
Aligned Layer 8 ‚Üî Layer 9, mean corr = 0.5122
Aligned Layer 8 ‚Üî Layer 10, mean corr = 0.4984
Aligned Layer 8 ‚Üî Layer 11, mean corr = 0.4677
 Saved merged full model to gpt2_merged_full_model8


('gpt2_compressed_half_model8/tokenizer_config.json',
 'gpt2_compressed_half_model8/special_tokens_map.json',
 'gpt2_compressed_half_model8/vocab.json',
 'gpt2_compressed_half_model8/merges.txt',
 'gpt2_compressed_half_model8/added_tokens.json')

In [31]:
# -------------------------
# 8. Compare parameter counts + test generation
# -------------------------
def count_params(m): return sum(p.numel() for p in m.parameters())

orig = GPT2LMHeadModel.from_pretrained("gpt2").to(device)
print("\nOriginal Params:", count_params(orig))

prompt = "India will become global leader in AI because"
inputs = tokenizer(prompt, return_tensors="pt").to(device)
with torch.no_grad():
    sample_out = orig.generate(**inputs, max_length=40)
print("\nGenerated output:")
print(tokenizer.decode(sample_out[0], skip_special_tokens=True))

# Iterate over compressed models
for k in [4, 6, 8, 10]:
    print(f"\n{'='*20}  Testing compressed_model{k}  {'='*20}")
    try:
        merged_model = GPT2LMHeadModel.from_pretrained(f"gpt2_merged_full_model{k}").to(device)
    except Exception as e:
        print(f"‚ùå Failed to load gpt2_merged_full_model{k}:", e)
        continue
    try:
        compressed_model_test = GPT2LMHeadModel.from_pretrained(f"gpt2_compressed_half_model{k}").to(device)
    except Exception as e:
        print(f"‚ùå Failed to load compressed_model{k}.pth:", e)
        continue

    print("Merged Params:", count_params(merged_model))
    print("Compressed Params:", count_params(compressed_model_test))

    # Generate text
    with torch.no_grad():
        out = compressed_model_test.generate(**inputs, max_length=40)
    print("Generated output:")
    print(tokenizer.decode(out[0], skip_special_tokens=True))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Original Params: 124439808

Generated output:
India will become global leader in AI because of its ability to solve problems in a way that is not just a matter of human ingenuity but also of the human spirit.

The AI revolution is a

Merged Params: 124439808
Compressed Params: 60648960
Generated output:
India will become global leader in AI becausecsvcsvcsv databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases databases

Merged Params: 124439808
Compressed Params: 53561088
Generated output:
India will become global leader in AI because Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Secondly Second

### perplexity

In [32]:
%pip install transformers datasets accelerate evaluate


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
### perplexity with out training 

In [34]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from datasets import load_dataset
import math

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

# Load dataset
dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
texts = [t for t in dataset['text'] if len(t.strip()) > 0][:100]  # use 100 lines for speed


def compute_perplexity(model_or_name, texts, batch_size=4):
    """Compute perplexity for either a model name or a preloaded model object."""
    if isinstance(model_or_name, str):
        model = GPT2LMHeadModel.from_pretrained(model_or_name).to(device).eval()
    else:
        model = model_or_name.to(device).eval()

    total_loss, total_tokens = 0.0, 0

    for i in range(0, len(texts), batch_size):
        batch = texts[i:i + batch_size]
        inputs = tokenizer(
            batch,
            return_tensors='pt',
            padding=True,
            truncation=True,
            max_length=128
        ).to(device)

        with torch.no_grad():
            loss = model(**inputs, labels=inputs["input_ids"]).loss

        num_tokens = inputs["input_ids"].numel()
        total_loss += loss.item() * num_tokens
        total_tokens += num_tokens

    avg_loss = total_loss / total_tokens
    return math.exp(avg_loss)


# --- Original model ---
orig_ppl = compute_perplexity("gpt2", texts)
print(f"Original GPT-2 Perplexity: {orig_ppl:.2f}")

# --- Compressed models ---
for k in [4, 6, 8, 10]:
    print(f"\n=== Evaluating compressed_model{k}.pth ===")
    try:
        compressed_model_test = GPT2LMHeadModel.from_pretrained(f"gpt2_compressed_half_model{k}").to(device)
    except Exception as e:
        print(f"‚ùå Failed to load gpt2_compressed_half_model{k}:", e)
        continue
    try:
        merged_model = GPT2LMHeadModel.from_pretrained(f"gpt2_merged_full_model{k}").to(device)
    except Exception as e:
        print(f"‚ùå Failed to load gpt2_merged_full_model{k}:", e)
        continue

    compressed_ppl = compute_perplexity(compressed_model_test, texts)
    merged_ppl = compute_perplexity(merged_model, texts)
    print(f"Merged GPT-2 (k={k}) Perplexity: {merged_ppl:.2f}")
    print(f"Compressed GPT-2 (k={k}) Perplexity: {compressed_ppl:.2f}")


Original GPT-2 Perplexity: 851.83

=== Evaluating compressed_model4.pth ===
Merged GPT-2 (k=4) Perplexity: 11748.72
Compressed GPT-2 (k=4) Perplexity: 398065.80

=== Evaluating compressed_model6.pth ===
Merged GPT-2 (k=6) Perplexity: 8714.57
Compressed GPT-2 (k=6) Perplexity: 247155.87

=== Evaluating compressed_model8.pth ===
Merged GPT-2 (k=8) Perplexity: 27535.84
Compressed GPT-2 (k=8) Perplexity: 2133693.73

=== Evaluating compressed_model10.pth ===
Merged GPT-2 (k=10) Perplexity: 4069.90
Compressed GPT-2 (k=10) Perplexity: 99429.67


### perplexity with training 

In [38]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
from datasets import load_dataset
import evaluate
import math

device = "cuda" if torch.cuda.is_available() else "cpu"
# 1. Tokenizer
# --------------------------
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

# --------------------------
# 2. Load + preprocess WikiText-2 properly
# --------------------------
dataset = load_dataset("wikitext", "wikitext-2-raw-v1")

def tokenize_function(examples):
    out = tokenizer(
        examples["text"], 
        padding="max_length", 
        truncation=True, 
        max_length=128
    )
    out["labels"] = out["input_ids"].copy()
    return out

tokenized = dataset.map(tokenize_function, batched=True, remove_columns=["text"])
tokenized.set_format(type="torch")
train_data = tokenized["train"]
test_data = tokenized["test"]

# --------------------------
# 3. Training Function
# --------------------------
def train_model(model, save_dir):
    # Make sure gradients are enabled and model is in training mode
    torch.set_grad_enabled(True)
    model.train()

    args = TrainingArguments(
        output_dir=save_dir,
        eval_strategy="no",
        per_device_train_batch_size=2,
        num_train_epochs=2,
        logging_steps=2000,
        learning_rate=5e-5,
        weight_decay=0.01,
        report_to="none",
        save_strategy="no",    # or "epoch" or "no"
        save_steps=20000,         # save every 50k steps
        save_total_limit=1,       # keep only the last checkpoint
    )

    trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_data,
    tokenizer=tokenizer
    )
    trainer.train()

    model.save_pretrained(save_dir)

# --------------------------
# 4. Load & Train All Models
# --------------------------
models_to_train = {}

# Original GPT-2
models_to_train["gpt2_orig"] = "gpt2"

# Merged models
for k in [4, 6, 8, 10]:
    models_to_train[f"merged_model_{k}"] = f"gpt2_merged_full_model{k}"

# Compressed models
for k in [4, 6, 8, 10]:
    models_to_train[f"compressed_model_{k}"] = f"gpt2_compressed_half_model{k}"

for name, pa.0th_ in models_to_train.items():
    print(f"\n=== Training {name} ===")
    
    # Load model from directory
    try:
        model = GPT2LMHeadModel.from_pretrained(path_)
    except Exception as e:
        print(f"‚ùå Failed to load {path_}:", e)
        continue

    # Fine-tune
    train_model(model, save_dir=f"{name}_finetuned")



=== Training gpt2_orig ===


  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.


Step,Training Loss
2000,1.3683
4000,1.3345
6000,1.2699
8000,1.3044
10000,1.2747
12000,1.2286
14000,1.3107
16000,1.2457
18000,1.2631
20000,1.1309


The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.



=== Training merged_model_4 ===


Step,Training Loss
2000,1.7026
4000,1.4942
6000,1.3989
8000,1.4238
10000,1.3777
12000,1.3263
14000,1.4073
16000,1.332
18000,1.3491
20000,1.223


The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.



=== Training merged_model_6 ===


Step,Training Loss
2000,1.7142
4000,1.4885
6000,1.3909
8000,1.4121
10000,1.3688
12000,1.3161
14000,1.3971
16000,1.3222
18000,1.3393
20000,1.2151


The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.



=== Training merged_model_8 ===


Step,Training Loss
2000,1.7161
4000,1.4978
6000,1.3993
8000,1.4208
10000,1.375
12000,1.3219
14000,1.4024
16000,1.3263
18000,1.3431
20000,1.22


The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 50256}.



=== Training merged_model_10 ===


Step,Training Loss
2000,1.7021
4000,1.4988
6000,1.4006
8000,1.4224
10000,1.3759
12000,1.323
14000,1.4038
16000,1.3275
18000,1.3435
20000,1.2207



=== Training compressed_model_4 ===


Step,Training Loss
2000,2.2235
4000,2.046
6000,1.9095
8000,1.9281
10000,1.8549
12000,1.7865
14000,1.8857
16000,1.7825
18000,1.7964
20000,1.6905



=== Training compressed_model_6 ===


Step,Training Loss
2000,2.2761
4000,2.0986
6000,1.9647
8000,1.9852
10000,1.9117
12000,1.8433
14000,1.9489
16000,1.8418
18000,1.8574
20000,1.7529



=== Training compressed_model_8 ===


Step,Training Loss
2000,2.2841
4000,2.0962
6000,1.9624
8000,1.9825
10000,1.9082
12000,1.8386
14000,1.9455
16000,1.8396
18000,1.8543
20000,1.7507



=== Training compressed_model_10 ===


Step,Training Loss
2000,2.3276
4000,2.1198
6000,1.9802
8000,1.9995
10000,1.9227
12000,1.8529
14000,1.9587
16000,1.8511
18000,1.8665
20000,1.7681


In [39]:
import torch
import math
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from datasets import load_dataset

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer again
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

# Load WikiText-2 Raw
dataset = load_dataset("wikitext", "wikitext-2-raw-v1")
test_texts = dataset["test"]   # we use raw text here.


# ----------------------------------------------------------------------
# 1. Perplexity Function (correct!)
# ----------------------------------------------------------------------
def compute_perplexity(model, data, tokenizer, max_samples=200, batch_size=2):
    model.eval()
    model.to(device)
    
    # Filter non-empty strings
    texts = [t for t in data["text"] if isinstance(t, str) and len(t.strip()) > 0][:max_samples]

    total_loss = 0.0
    total_tokens = 0

    for i in tqdm(range(0, len(texts), batch_size), desc="Computing perplexity"):
        batch = texts[i:i + batch_size]
        enc = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)

        with torch.no_grad():
            out = model(**enc, labels=enc["input_ids"])
            loss = out.loss

            # Weighted loss
            total_loss += loss.item() * enc["input_ids"].numel()
            total_tokens += enc["input_ids"].numel()

    avg_loss = total_loss / total_tokens
    perplexity = math.exp(avg_loss)
    return perplexity


In [40]:
# Dictionary of saved model directories
model_dirs = {
    "gpt2_orig": "gpt2_orig_finetuned",
    "merged_4": "merged_model_4_finetuned",
    "merged_6": "merged_model_6_finetuned",
    "merged_8": "merged_model_8_finetuned",
    "merged_10": "merged_model_10_finetuned",
    "compressed_4": "compressed_model_4_finetuned",
    "compressed_6": "compressed_model_6_finetuned",
    "compressed_8": "compressed_model_8_finetuned",
    "compressed_10": "compressed_model_10_finetuned",
}


In [41]:
results = {}

for name, path in model_dirs.items():
    print(f"\n=== Evaluating perplexity for: {name} ===")

    try:
        model = GPT2LMHeadModel.from_pretrained(path)
    except Exception as e:
        print(f"‚ùå Could not load {path}: {e}")
        continue

    ppl = compute_perplexity(model, dataset["test"], tokenizer)
    results[name] = ppl

    print(f"‚û°Ô∏è Perplexity of {name}: {ppl:.4f}")



=== Evaluating perplexity for: gpt2_orig ===


Computing perplexity: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:15<00:00,  6.66it/s]


‚û°Ô∏è Perplexity of gpt2_orig: 12.2247

=== Evaluating perplexity for: merged_4 ===


Computing perplexity: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:14<00:00,  6.75it/s]


‚û°Ô∏è Perplexity of merged_4: 13.3651

=== Evaluating perplexity for: merged_6 ===


Computing perplexity: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:15<00:00,  6.27it/s]


‚û°Ô∏è Perplexity of merged_6: 13.1584

=== Evaluating perplexity for: merged_8 ===


Computing perplexity: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:16<00:00,  6.14it/s]


‚û°Ô∏è Perplexity of merged_8: 13.1134

=== Evaluating perplexity for: merged_10 ===


Computing perplexity: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:16<00:00,  6.05it/s]


‚û°Ô∏è Perplexity of merged_10: 13.1680

=== Evaluating perplexity for: compressed_4 ===


Computing perplexity: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:07<00:00, 13.20it/s]


‚û°Ô∏è Perplexity of compressed_4: 28.4862

=== Evaluating perplexity for: compressed_6 ===


Computing perplexity: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 14.95it/s]


‚û°Ô∏è Perplexity of compressed_6: 33.2190

=== Evaluating perplexity for: compressed_8 ===


Computing perplexity: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 15.13it/s]


‚û°Ô∏è Perplexity of compressed_8: 32.0007

=== Evaluating perplexity for: compressed_10 ===


Computing perplexity: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:06<00:00, 15.28it/s]

‚û°Ô∏è Perplexity of compressed_10: 31.8289





In [42]:
print("\n\n========== FINAL PERPLEXITY RESULTS ==========")
for name, ppl in results.items():
    print(f"{name:20s} : {ppl:.4f}")




gpt2_orig            : 12.2247
merged_4             : 13.3651
merged_6             : 13.1584
merged_8             : 13.1134
merged_10            : 13.1680
compressed_4         : 28.4862
compressed_6         : 33.2190
compressed_8         : 32.0007
compressed_10        : 31.8289


In [43]:
import time
import torch

def measure_latency(model, tokenizer, device="cuda", num_samples=20, max_length=64):
    model.eval()
    model.to(device)

    texts = ["The quick brown fox jumps over the lazy dog."] * num_samples

    # Warm-up rounds
    for _ in range(3):
        _ = model.generate(
            **tokenizer(texts[0], return_tensors="pt").to(device),
            max_length=max_length
        )

    if device == "cuda":
        torch.cuda.synchronize()

    start = time.time()
    for text in texts:
        inputs = tokenizer(text, return_tensors="pt").to(device)
        with torch.no_grad():
            _ = model.generate(**inputs, max_length=max_length)

    if device == "cuda":
        torch.cuda.synchronize()

    total = time.time() - start
    return total / num_samples  # average per sample


In [44]:
model_dirs = {
    "gpt2_orig": "gpt2_orig_finetuned",
    "merged_4": "merged_model_4_finetuned",
    "merged_6": "merged_model_6_finetuned",
    "merged_8": "merged_model_8_finetuned",
    "merged_10": "merged_model_10_finetuned",
    "compressed_4": "compressed_model_4_finetuned",
    "compressed_6": "compressed_model_6_finetuned",
    "compressed_8": "compressed_model_8_finetuned",
    "compressed_10": "compressed_model_10_finetuned"
}


In [47]:
from transformers import GPT2LMHeadModel

latency_results = {}

for name, path in model_dirs.items():
    print(f"\n=== Measuring latency for: {name} ===")
    
    try:
        model = GPT2LMHeadModel.from_pretrained(path)
    except Exception as e:
        print(f"‚ùå Failed to load {path}: {e}")
        continue

    latency = measure_latency(model, tokenizer, device=device)
    latency_results[name] = latency

    print(f"‚û°Ô∏è {name} Latency: {latency*1000:.2f} ms/sample")
 


=== Measuring latency for: gpt2_orig ===
‚û°Ô∏è gpt2_orig Latency: 73.90 ms/sample

=== Measuring latency for: merged_4 ===
‚û°Ô∏è merged_4 Latency: 71.28 ms/sample

=== Measuring latency for: merged_6 ===
‚û°Ô∏è merged_6 Latency: 84.85 ms/sample

=== Measuring latency for: merged_8 ===
‚û°Ô∏è merged_8 Latency: 83.57 ms/sample

=== Measuring latency for: merged_10 ===
‚û°Ô∏è merged_10 Latency: 71.61 ms/sample

=== Measuring latency for: compressed_4 ===
‚û°Ô∏è compressed_4 Latency: 590.37 ms/sample

=== Measuring latency for: compressed_6 ===
‚û°Ô∏è compressed_6 Latency: 30.50 ms/sample

=== Measuring latency for: compressed_8 ===
‚û°Ô∏è compressed_8 Latency: 502.06 ms/sample

=== Measuring latency for: compressed_10 ===
‚û°Ô∏è compressed_10 Latency: 31.12 ms/sample


In [48]:
orig_lat = latency_results["gpt2_orig"]

print("\n=========== LATENCY COMPARISON ===========")
for name, lat in latency_results.items():
    speed = orig_lat / lat
    print(f"{name:20s} : {lat*1000:.2f} ms   | speedup = {speed:.2f}x")



gpt2_orig            : 73.90 ms   | speedup = 1.00x
merged_4             : 71.28 ms   | speedup = 1.04x
merged_6             : 84.85 ms   | speedup = 0.87x
merged_8             : 83.57 ms   | speedup = 0.88x
merged_10            : 71.61 ms   | speedup = 1.03x
compressed_4         : 590.37 ms   | speedup = 0.13x
compressed_6         : 30.50 ms   | speedup = 2.42x
compressed_8         : 502.06 ms   | speedup = 0.15x
compressed_10        : 31.12 ms   | speedup = 2.37x


### IMDB

In [1]:
import torch
import torch.nn as nn
from transformers import GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments, DataCollatorWithPadding
from datasets import load_dataset

device = "cuda" if torch.cuda.is_available() else "cpu"

# ==================================================
# 1. Load Tokenizer
# ==================================================
tok = GPT2Tokenizer.from_pretrained("gpt2")
tok.pad_token = tok.eos_token

# ==================================================
# 2. Load & Tokenize IMDB
# ==================================================
ds = load_dataset("imdb")

def tok_fn(e):
    return tok(e["text"], truncation=True, padding="max_length", max_length=128)

ds = ds.map(tok_fn, batched=True, remove_columns=["text"])
ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

collator = DataCollatorWithPadding(tokenizer=tok)

# ==================================================
# 3. GPT-2 Sentiment Classification Wrapper
# ==================================================
class GPT2ForSentiment(nn.Module):
    def __init__(self, gpt_model):
        super().__init__()
        self.gpt2 = gpt_model
        self.classifier = nn.Linear(self.gpt2.config.n_embd, 2)

    def forward(self, input_ids, attention_mask=None, labels=None):
        out = self.gpt2(
            input_ids=input_ids,
            attention_mask=attention_mask,
            output_hidden_states=True
        )
        # Take last non-padding token
        lengths = attention_mask.sum(dim=1) - 1
        last_hidden = out.hidden_states[-1][torch.arange(input_ids.size(0)), lengths, :]
        logits = self.classifier(last_hidden)

        loss = None
        if labels is not None:
            loss = nn.functional.cross_entropy(logits, labels)
        return {"loss": loss, "logits": logits}

# ==================================================
# 4. Training Arguments
# ==================================================
args = TrainingArguments(
    output_dir="./imdb_results",
    overwrite_output_dir=True,
    num_train_epochs=2,
    per_device_train_batch_size=2,
    learning_rate=5e-5,
    logging_steps=50,
    save_strategy="no",
    fp16=torch.cuda.is_available(),
    report_to="none"
)

# ==================================================
# 5. Training & Evaluation Function
# ==================================================
def train_and_eval(model, name):
    print(f"\n==============================")
    print(f"üîπ Training {name}")
    print(f"==============================")

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=ds["train"],
        eval_dataset=ds["test"],
        tokenizer=tok,
        data_collator=collator
    )

    trainer.train()

    # Accuracy Evaluation
    model.eval()
    correct, total = 0, 0
    dl = torch.utils.data.DataLoader(ds["test"], batch_size=16)

    for batch in dl:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            out = model(batch["input_ids"], batch["attention_mask"])
        preds = out["logits"].argmax(dim=-1)
        correct += (preds == batch["label"]).sum().item()
        total += batch["label"].size(0)

    acc = correct / total
    print(f"‚úÖ {name} IMDb Accuracy: {acc*100:.2f}%")

    # Save model
    save_dir = f"{name}_imdb_finetuned"
    model.gpt2.save_pretrained(save_dir)
    print(f"üíæ Saved to {save_dir}")

    return acc

# ==================================================
# 6. Model Paths (Same as your perplexity setup)
# ==================================================
model_paths = {
    "gpt2_orig": "gpt2",
    "merged_4": "gpt2_merged_full_model4",
    "merged_6": "gpt2_merged_full_model6",
    "merged_8": "gpt2_merged_full_model8",
    "merged_10": "gpt2_merged_full_model10",
    "compressed_4": "gpt2_compressed_half_model4",
    "compressed_6": "gpt2_compressed_half_model6",
    "compressed_8": "gpt2_compressed_half_model8",
    "compressed_10": "gpt2_compressed_half_model10"
}

# ==================================================
# 7. Run Training for All Models
# ==================================================
results = {}

for name, path in model_paths.items():
    print(f"\n=== Loading base model: {name} ===")
    try:
        base = GPT2LMHeadModel.from_pretrained(path)
    except Exception as e:
        print(f"‚ùå Failed to load {path}: {e}")
        continue

    model = GPT2ForSentiment(base.to(device))
    acc = train_and_eval(model, name)
    results[name] = acc

# ==================================================
# 8. Print Summary Table
# ==================================================
print("\n================ FINAL IMDb Accuracies ================")
for name, acc in results.items():
    print(f"{name:20s} : {acc*100:.2f}%")


  from .autonotebook import tqdm as notebook_tqdm
2025-11-14 22:24:42.898957: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-14 22:24:43.246139: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-14 22:24:44.572883: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.



=== Loading base model: gpt2_orig ===

üîπ Training gpt2_orig


  trainer = Trainer(


Step,Training Loss
50,1.3711
100,0.7998
150,0.7377
200,0.7223
250,1.4197
300,1.2618
350,1.0684
400,1.2132
450,0.7543
500,1.248


‚úÖ gpt2_orig IMDb Accuracy: 87.45%
üíæ Saved to gpt2_orig_imdb_finetuned

=== Loading base model: merged_4 ===

üîπ Training merged_4


Step,Training Loss
50,1.7407
100,1.2555
150,0.8482
200,0.6726
250,0.8302
300,0.7161
350,0.7623
400,0.8079
450,0.7309
500,0.8296


‚úÖ merged_4 IMDb Accuracy: 82.99%
üíæ Saved to merged_4_imdb_finetuned

=== Loading base model: merged_6 ===

üîπ Training merged_6


Step,Training Loss
50,1.2084
100,0.8098
150,0.8204
200,0.7316
250,0.7399
300,0.73
350,0.7021
400,0.7254
450,0.7369
500,0.7473


‚úÖ merged_6 IMDb Accuracy: 83.34%
üíæ Saved to merged_6_imdb_finetuned

=== Loading base model: merged_8 ===

üîπ Training merged_8


Step,Training Loss
50,1.1709
100,0.7097
150,0.7142
200,0.6781
250,0.9764
300,0.7238
350,0.7291
400,0.8275
450,0.7294
500,0.8096


‚úÖ merged_8 IMDb Accuracy: 82.94%
üíæ Saved to merged_8_imdb_finetuned

=== Loading base model: merged_10 ===

üîπ Training merged_10


Step,Training Loss
50,1.3188
100,1.2791
150,0.905
200,0.7908
250,0.8571
300,0.8121
350,0.7081
400,0.7751
450,0.726
500,0.8336


‚úÖ merged_10 IMDb Accuracy: 83.07%
üíæ Saved to merged_10_imdb_finetuned

=== Loading base model: compressed_4 ===

üîπ Training compressed_4


Step,Training Loss
50,0.8779
100,0.7789
150,0.7747
200,0.7124
250,0.7383
300,0.7383
350,0.706
400,0.7636
450,0.7002
500,0.7545


‚úÖ compressed_4 IMDb Accuracy: 83.46%
üíæ Saved to compressed_4_imdb_finetuned

=== Loading base model: compressed_6 ===

üîπ Training compressed_6


Step,Training Loss
50,0.9764
100,0.7923
150,0.7838
200,0.7033
250,0.7658
300,0.7284
350,0.7143
400,0.8046
450,0.6822
500,0.7584


‚úÖ compressed_6 IMDb Accuracy: 83.34%
üíæ Saved to compressed_6_imdb_finetuned

=== Loading base model: compressed_8 ===

üîπ Training compressed_8


Step,Training Loss
50,1.1277
100,0.7679
150,0.7484
200,0.7131
250,0.7651
300,0.7164
350,0.7229
400,0.8119
450,0.7049
500,0.7686


‚úÖ compressed_8 IMDb Accuracy: 82.95%
üíæ Saved to compressed_8_imdb_finetuned

=== Loading base model: compressed_10 ===

üîπ Training compressed_10


Step,Training Loss
50,1.4091
100,0.7563
150,0.8163
200,0.7475
250,0.6813
300,0.7181
350,0.6977
400,0.7666
450,0.7093
500,0.7729


‚úÖ compressed_10 IMDb Accuracy: 83.14%
üíæ Saved to compressed_10_imdb_finetuned

gpt2_orig            : 87.45%
merged_4             : 82.99%
merged_6             : 83.34%
merged_8             : 82.94%
merged_10            : 83.07%
compressed_4         : 83.46%
compressed_6         : 83.34%
compressed_8         : 82.95%
compressed_10        : 83.14%


In [2]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from datasets import load_dataset
import evaluate
import numpy as np
from tqdm import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"

# --------------------------
# 1. Dataset: CNN/DailyMail (first 100 test samples)
# --------------------------
data = load_dataset("cnn_dailymail", "3.0.0", split="test[:100]")  

# --------------------------
# 2. ROUGE Metric
# --------------------------
rouge = evaluate.load("rouge")

# --------------------------
# 3. Tokenizer (same for all GPT-based models)
# --------------------------
tok = GPT2Tokenizer.from_pretrained("gpt2")
tok.pad_token = tok.eos_token

# --------------------------
# 4. Models to Evaluate
# --------------------------
models = {   
    "gpt2_orig": "gpt2",
    "merged_4": "gpt2_merged_full_model4",
    "merged_6": "gpt2_merged_full_model6",
    "merged_8": "gpt2_merged_full_model8",
    "merged_10": "gpt2_merged_full_model10",
    "compressed_4": "gpt2_compressed_half_model4",
    "compressed_6": "gpt2_compressed_half_model6",
    "compressed_8": "gpt2_compressed_half_model8",
    "compressed_10": "gpt2_compressed_half_model10"
}

# --------------------------
# 5. Summarization Function
# --------------------------
def generate_summary(model, text, max_new_tokens=60):
    inp = tok(
        text, return_tensors="pt",
        truncation=True, padding=True, max_length=128
    ).to(device)

    output = model.generate(
        **inp,
        max_new_tokens=max_new_tokens,
        num_beams=3,
        early_stopping=True,
        no_repeat_ngram_size=2
    )

    return tok.decode(output[0], skip_special_tokens=True)

# --------------------------
# 6. Evaluation Loop
# --------------------------
results = {}

for name, path in models.items():
    print(f"\nüîπ Evaluating ROUGE for: {name}")
    
    model = GPT2LMHeadModel.from_pretrained(path).to(device).eval()
    predictions, references = [], []

    for i in tqdm(range(len(data))):
        article = data[i]["article"]
        ref = data[i]["highlights"]
        pred = generate_summary(model, article)
        predictions.append(pred)
        references.append(ref)

    score = rouge.compute(predictions=predictions, references=references)
    results[name] = score
    print(f"‚úÖ ROUGE for {name}:", score)

# --------------------------
# 7. Final Comparison
# --------------------------
print("\nüìä Final ROUGE-L Comparison:")
for model_name, score in results.items():
    print(model_name, "‚Üí ROUGE-L:", score["rougeL"])


Generating train split: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 287113/287113 [00:02<00:00, 96662.27 examples/s] 
Generating validation split: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 13368/13368 [00:00<00:00, 89015.81 examples/s]
Generating test split: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 11490/11490 [00:00<00:00, 100469.18 examples/s]
Downloading builder script: 6.14kB [00:00, 3.87MB/s]



üîπ Evaluating ROUGE for: gpt2_orig


  0%|          | 0/100 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  1%|          | 1/100 [00:00<01:02,  1.60it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  2%|‚ñè         | 2/100 [00:00<00:42,  2.29it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  3%|‚ñé         | 3/100 [00:01<00:36,  2.66it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  4%|‚ñç         | 4/100 [00:01<00:33,  2.85it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  5%|‚ñå         | 5/100 [00:01<00:31,  3.01it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  6%|‚ñå         | 6/100 [00:02<00:30,  3.10it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  7%|‚ñã         | 7/100 [00:02<00:29,  3.15it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  8%|‚ñä         | 8/100 [00:02<00:23,  3.97

‚úÖ ROUGE for gpt2_orig: {'rouge1': np.float64(0.23686015230383223), 'rouge2': np.float64(0.10447509226414367), 'rougeL': np.float64(0.1703181031849859), 'rougeLsum': np.float64(0.19939702492949057)}

üîπ Evaluating ROUGE for: merged_4


  0%|          | 0/100 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  1%|          | 1/100 [00:00<00:29,  3.30it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  2%|‚ñè         | 2/100 [00:00<00:29,  3.30it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  3%|‚ñé         | 3/100 [00:00<00:29,  3.30it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  4%|‚ñç         | 4/100 [00:01<00:28,  3.31it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  5%|‚ñå         | 5/100 [00:01<00:28,  3.31it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  6%|‚ñå         | 6/100 [00:01<00:28,  3.26it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  7%|‚ñã         | 7/100 [00:02<00:28,  3.28it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  8%|‚ñä         | 8/100 [00:02<00:27,  3.30

‚úÖ ROUGE for merged_4: {'rouge1': np.float64(0.23067422557445744), 'rouge2': np.float64(0.10050793644136888), 'rougeL': np.float64(0.16237754719411127), 'rougeLsum': np.float64(0.18785776549549799)}

üîπ Evaluating ROUGE for: merged_6


  0%|          | 0/100 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  2%|‚ñè         | 2/100 [00:00<00:18,  5.20it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  3%|‚ñé         | 3/100 [00:00<00:23,  4.21it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  4%|‚ñç         | 4/100 [00:00<00:25,  3.84it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  5%|‚ñå         | 5/100 [00:01<00:26,  3.58it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  6%|‚ñå         | 6/100 [00:01<00:26,  3.49it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  7%|‚ñã         | 7/100 [00:01<00:27,  3.43it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  8%|‚ñä         | 8/100 [00:02<00:27,  3.39it/s]Setting `pad_token_id` to `eos_token_id`:5

‚úÖ ROUGE for merged_6: {'rouge1': np.float64(0.22685958091945163), 'rouge2': np.float64(0.0997681947941437), 'rougeL': np.float64(0.16029673426510804), 'rougeLsum': np.float64(0.18613871479890431)}

üîπ Evaluating ROUGE for: merged_8


  0%|          | 0/100 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  1%|          | 1/100 [00:00<00:29,  3.33it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  2%|‚ñè         | 2/100 [00:00<00:29,  3.32it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  3%|‚ñé         | 3/100 [00:00<00:29,  3.30it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  4%|‚ñç         | 4/100 [00:01<00:28,  3.31it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  5%|‚ñå         | 5/100 [00:01<00:28,  3.32it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  6%|‚ñå         | 6/100 [00:01<00:28,  3.31it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  7%|‚ñã         | 7/100 [00:02<00:28,  3.30it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  8%|‚ñä         | 8/100 [00:02<00:27,  3.30

‚úÖ ROUGE for merged_8: {'rouge1': np.float64(0.22821710659247205), 'rouge2': np.float64(0.1000957985010239), 'rougeL': np.float64(0.15954930806658818), 'rougeLsum': np.float64(0.18670918610603174)}

üîπ Evaluating ROUGE for: merged_10


  0%|          | 0/100 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  1%|          | 1/100 [00:00<00:30,  3.27it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  2%|‚ñè         | 2/100 [00:00<00:29,  3.30it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  3%|‚ñé         | 3/100 [00:00<00:29,  3.30it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  4%|‚ñç         | 4/100 [00:01<00:29,  3.29it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  5%|‚ñå         | 5/100 [00:01<00:28,  3.28it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  6%|‚ñå         | 6/100 [00:01<00:28,  3.28it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  7%|‚ñã         | 7/100 [00:02<00:28,  3.28it/s]Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
  8%|‚ñä         | 8/100 [00:02<00:27,  3.29

‚úÖ ROUGE for merged_10: {'rouge1': np.float64(0.2656927442882041), 'rouge2': np.float64(0.11734741880856367), 'rougeL': np.float64(0.18603887143869474), 'rougeLsum': np.float64(0.21734952824632464)}

üîπ Evaluating ROUGE for: compressed_4


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:15<00:00,  6.28it/s]


‚úÖ ROUGE for compressed_4: {'rouge1': np.float64(0.1959989554125225), 'rouge2': np.float64(0.08627431150794562), 'rougeL': np.float64(0.13732425935189357), 'rougeLsum': np.float64(0.16028942504433635)}

üîπ Evaluating ROUGE for: compressed_6


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:14<00:00,  7.03it/s]


‚úÖ ROUGE for compressed_6: {'rouge1': np.float64(0.2010496471219669), 'rouge2': np.float64(0.08842919377043754), 'rougeL': np.float64(0.14086239841847736), 'rougeLsum': np.float64(0.16460996729544008)}

üîπ Evaluating ROUGE for: compressed_8


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:14<00:00,  6.98it/s]


‚úÖ ROUGE for compressed_8: {'rouge1': np.float64(0.19911675148005076), 'rouge2': np.float64(0.08762151187074499), 'rougeL': np.float64(0.13940229051094813), 'rougeLsum': np.float64(0.16288697892987475)}

üîπ Evaluating ROUGE for: compressed_10


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [00:14<00:00,  6.98it/s]


‚úÖ ROUGE for compressed_10: {'rouge1': np.float64(0.21008277531349), 'rouge2': np.float64(0.09152118560110611), 'rougeL': np.float64(0.1466985625357335), 'rougeLsum': np.float64(0.1712538564874653)}

üìä Final ROUGE-L Comparison:
gpt2_orig ‚Üí ROUGE-L: 0.1703181031849859
merged_4 ‚Üí ROUGE-L: 0.16237754719411127
merged_6 ‚Üí ROUGE-L: 0.16029673426510804
merged_8 ‚Üí ROUGE-L: 0.15954930806658818
merged_10 ‚Üí ROUGE-L: 0.18603887143869474
compressed_4 ‚Üí ROUGE-L: 0.13732425935189357
compressed_6 ‚Üí ROUGE-L: 0.14086239841847736
compressed_8 ‚Üí ROUGE-L: 0.13940229051094813
compressed_10 ‚Üí ROUGE-L: 0.1466985625357335
