In [10]:
# Now, install the dependencies
!pip install datasets
!pip install torch




In [11]:
import os
from datasets import load_dataset
from transformers import AutoTokenizer
from huggingface_hub import notebook_login
from tqdm.auto import tqdm

# --- Step 1: Login to Hugging Face ---
# A login box will appear. Paste your Hugging Face access token here.
# Get a token at: https://huggingface.co/settings/tokens
print("Please log in to your Hugging Face account.")
notebook_login()


Please log in to your Hugging Face account.


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv‚Ä¶

In [6]:
import os
from datasets import load_dataset
from transformers import AutoTokenizer
from tqdm import tqdm

# --- Step 2: Configuration ---
PROJECT_PATH = ""
OUTPUT_DIR = os.path.join(PROJECT_PATH, "data")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ‚úÖ Collect ~1 million tokens per language
TARGET_TOKEN_COUNT = 1_000_000
MODEL_NAME = "meta-llama/Llama-3.2-3B"
LANGUAGES = ['en', 'hi']
CONFIG_DATE = '20231101'

print(f"Output directory for text files: {OUTPUT_DIR}")

# --- Step 3: Load the Tokenizer ---
print(f"Loading tokenizer for '{MODEL_NAME}'...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
print("‚úÖ Tokenizer loaded successfully.")

# --- Step 4: Process each language ---
for lang in LANGUAGES:
    print("\n" + "=" * 60)
    print(f"üåç Processing language: {lang}")
    print("=" * 60)

    collected_documents = []
    current_token_count = 0

    dataset_config = f"{CONFIG_DATE}.{lang}"
    streaming_dataset = load_dataset(
        'wikimedia/wikipedia',
        dataset_config,
        split='train',
        streaming=True
    )

    pbar = tqdm(desc=f"Tokens for {lang}", total=TARGET_TOKEN_COUNT, unit='tok')

    for doc in streaming_dataset:
        if current_token_count >= TARGET_TOKEN_COUNT:
            break

        text = doc.get('text', '')
        if not text:
            continue

        # ‚ö° Optional: truncate very large docs for efficiency
        text = text[:5000]
        num_tokens = len(tokenizer.encode(text, add_special_tokens=False))

        if num_tokens == 0:
            continue

        collected_documents.append(text)
        current_token_count += num_tokens
        pbar.update(num_tokens)

    pbar.close()

    # --- Step 5: Save to file ---
    output_filepath = os.path.join(OUTPUT_DIR, f"wikipedia_1M_{lang}.txt")
    print(f"\nüíæ Saving {len(collected_documents)} docs ({current_token_count:,} tokens) to {output_filepath}...")

    with open(output_filepath, 'w', encoding='utf-8') as f:
        f.write("\n\n".join(collected_documents))

    print(f"‚úÖ Successfully saved file for language '{lang}'. ({current_token_count:,} tokens total)")

print("\nüéØ All languages processed successfully with 1M tokens each!")


Output directory for text files: data
Loading tokenizer for 'meta-llama/Llama-3.2-3B'...
‚úÖ Tokenizer loaded successfully.

üåç Processing language: en


Resolving data files:   0%|          | 0/41 [00:00<?, ?it/s]

Tokens for en: 1000237tok [00:05, 176553.95tok/s]



üíæ Saving 1002 docs (1,000,237 tokens) to data/wikipedia_1M_en.txt...
‚úÖ Successfully saved file for language 'en'. (1,000,237 tokens total)

üåç Processing language: hi


Tokens for hi: 1000854tok [00:04, 238108.15tok/s]                           


üíæ Saving 564 docs (1,000,854 tokens) to data/wikipedia_1M_hi.txt...
‚úÖ Successfully saved file for language 'hi'. (1,000,854 tokens total)

üéØ All languages processed successfully with 1M tokens each!





In [7]:
import torch
import os
from transformers import AutoTokenizer
from tqdm.auto import tqdm

# --- Configuration ---
PROJECT_PATH = ""
DATA_DIR = os.path.join(PROJECT_PATH, "data")
MODEL_NAME = "meta-llama/Llama-3.2-3B"
LANGUAGES = ['en', 'hi']

print("üîπ Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
print("‚úÖ Tokenizer loaded successfully.")

# --- Tokenize each 1M-token dataset ---
for lang in LANGUAGES:
    input_filepath = os.path.join(DATA_DIR, f"wikipedia_1M_{lang}.txt")
    output_filepath = os.path.join(DATA_DIR, f"id.{lang}.1M.llama.pt")

    print("\n" + "=" * 70)
    print(f"üåç Processing language: {lang}")
    print(f"üì• Reading from: {input_filepath}")
    print(f"üíæ Saving to: {output_filepath}")
    print("=" * 70)

    if not os.path.exists(input_filepath):
        print(f"‚ùå ERROR: Input file not found at {input_filepath}")
        continue

    token_ids = []
    total_tokens = 0
    flush_interval = 500_000  # flush every 0.5M tokens

    with open(input_filepath, 'r', encoding='utf-8') as f:
        for line in tqdm(f, desc=f"Tokenizing {lang}", unit="line"):
            line = line.strip()
            if not line:
                continue

            # Encode text into token IDs
            ids = tokenizer.encode(line, add_special_tokens=False)
            if not ids:
                continue

            token_ids.extend(ids)
            total_tokens += len(ids)

            # Periodically flush to disk to prevent memory overflow
            if len(token_ids) >= flush_interval:
                torch.save(torch.LongTensor(token_ids), output_filepath)
                print(f"üåÄ Flushed {len(token_ids):,} tokens to {output_filepath}")
                token_ids = []

    # --- Save remaining tokens ---
    if token_ids:
        torch.save(torch.LongTensor(token_ids), output_filepath)
        print(f"‚úÖ Final flush: saved {len(token_ids):,} remaining tokens")

    print(f"‚úÖ Token tensor for '{lang}' saved successfully.")
    print(f"üìä Total tokens processed: {total_tokens:,}")

print("\nüéØ All tokenization steps complete for 1M-token datasets!")


üîπ Loading tokenizer...
‚úÖ Tokenizer loaded successfully.

üåç Processing language: en
üì• Reading from: data/wikipedia_1M_en.txt
üíæ Saving to: data/id.en.1M.llama.pt


Tokenizing en: 0line [00:00, ?line/s]

üåÄ Flushed 500,048 tokens to data/id.en.1M.llama.pt
‚úÖ Final flush: saved 491,293 remaining tokens
‚úÖ Token tensor for 'en' saved successfully.
üìä Total tokens processed: 991,341

üåç Processing language: hi
üì• Reading from: data/wikipedia_1M_hi.txt
üíæ Saving to: data/id.hi.1M.llama.pt


Tokenizing hi: 0line [00:00, ?line/s]

üåÄ Flushed 500,025 tokens to data/id.hi.1M.llama.pt
‚úÖ Final flush: saved 494,521 remaining tokens
‚úÖ Token tensor for 'hi' saved successfully.
üìä Total tokens processed: 994,546

üéØ All tokenization steps complete for 1M-token datasets!


In [10]:
import torch
import os

# --- Configuration ---
DATA_DIR = "data"  # folder containing your original 1M-token files
LANGUAGES = ["en", "hi"]
VALIDATION_RATIO = 0.05  # 5% of tokens for validation

# Loop over each language
for lang in LANGUAGES:
    # Original training file (1M tokens)
    train_path = os.path.join(DATA_DIR, f"id.{lang}.train.1M.llama.pt")

    if not os.path.exists(train_path):
        print(f"‚ùå File not found: {train_path}")
        continue

    # Load the full training token tensor
    all_tokens = torch.load(train_path)
    total_tokens = all_tokens.size(0)

    # Compute sizes
    val_size = int(total_tokens * VALIDATION_RATIO)
    train_size = total_tokens - val_size

    # Split tokens into train and validation
    train_tokens = all_tokens[:train_size]
    valid_tokens = all_tokens[train_size:]

    # Save new train and validation files
    train_out = os.path.join(DATA_DIR, f"id.{lang}.train.1M.llama.pt")
    valid_out = os.path.join(DATA_DIR, f"id.{lang}.valid.1M.llama.pt")

    torch.save(train_tokens, train_out)
    torch.save(valid_tokens, valid_out)

    print(f"‚úÖ {lang.upper()} split:")
    print(f"   Train tokens: {train_size} ‚Üí {train_out}")
    print(f"   Validation tokens: {val_size} ‚Üí {valid_out}")


‚úÖ EN split:
   Train tokens: 443393 ‚Üí data/id.en.train.1M.llama.pt
   Validation tokens: 23336 ‚Üí data/id.en.valid.1M.llama.pt
‚úÖ HI split:
   Train tokens: 446306 ‚Üí data/id.hi.train.1M.llama.pt
   Validation tokens: 23489 ‚Üí data/id.hi.valid.1M.llama.pt


**activation.py**

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from tqdm.auto import tqdm
import os

# --- Configuration ---
MODEL_NAME = "meta-llama/Llama-3.2-3B"
LANGUAGES = ['en', 'hi']
DATA_DIR = "data"
BATCH_SIZE = 2
MAX_LENGTH = 1024

print("üîπ Loading model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    torch_dtype=torch.float16
)
model.eval()
print("‚úÖ Model loaded successfully.")

# --- Extract model dimensions ---
num_layers = model.config.num_hidden_layers
intermediate_size = model.config.intermediate_size  # e.g. 8192
print(f"üìä Model has {num_layers} layers, intermediate size = {intermediate_size}")

# --- Initialize neuron activation counter ---
over_zero = torch.zeros(num_layers, intermediate_size, dtype=torch.int32, device='cuda')

# --- Hook for MLP activation tracking ---
def mlp_hook(layer_idx):
    def hook(module, input, output):
        # Compute full activation before down projection
        x = input[0]
        gate_up = module.gate_proj(x)
        up = module.up_proj(x)
        activation = (torch.nn.functional.silu(gate_up) * up).float()
        # Count how many activations > 0 per neuron
        over_zero[layer_idx, :activation.size(-1)] += (activation > 0).sum(dim=(0, 1))
    return hook

# Register hooks for all MLP layers
for i, layer in enumerate(model.model.layers):
    layer.mlp.register_forward_hook(mlp_hook(i))

# --- Process each language ---
for lang in LANGUAGES:
    tensor_path = os.path.join(DATA_DIR, f"id.{lang}.train.1M.llama.pt")
    if not os.path.exists(tensor_path):
        print(f"‚ùå ERROR: Token file not found at {tensor_path}")
        continue

    ids = torch.load(tensor_path).to('cuda')
    total_tokens = ids.size(0)
    print(f"\nüåç Processing language: {lang} ({total_tokens:,} tokens)")

    # Process tokens in batches
    for start in tqdm(range(0, total_tokens, BATCH_SIZE * MAX_LENGTH), desc=f"üîÑ Forward passes for {lang}"):
        batch_ids = []
        for b in range(BATCH_SIZE):
            s = start + b * MAX_LENGTH
            if s >= total_tokens:
                break
            e = min(s + MAX_LENGTH, total_tokens)
            chunk = ids[s:e]
            if chunk.size(0) < MAX_LENGTH:
                # Pad to full length
                pad = torch.zeros(MAX_LENGTH - chunk.size(0), dtype=chunk.dtype, device='cuda')
                chunk = torch.cat([chunk, pad])
            batch_ids.append(chunk)

        if not batch_ids:
            continue

        batch_ids = torch.stack(batch_ids, dim=0)
        attention_mask = (batch_ids != 0).long()

        with torch.no_grad():
            _ = model(input_ids=batch_ids, attention_mask=attention_mask)

    # --- Save activation statistics ---
    output_path = os.path.join(DATA_DIR, f"activation.{lang}.train.1M.llama3b.pt")
    torch.save({'over_zero': over_zero.cpu()}, output_path)
    print(f"‚úÖ Saved activations for '{lang}' ‚Üí {output_path}")

print("\nüéØ Activation collection completed for all languages (1M-token datasets).")


üîπ Loading model and tokenizer...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

‚úÖ Model loaded successfully.
üìä Model has 28 layers, intermediate size = 8192

üåç Processing language: en (443,393 tokens)


üîÑ Forward passes for en:   0%|          | 0/217 [00:00<?, ?it/s]

‚úÖ Saved activations for 'en' ‚Üí data/activation.en.train.1M.llama3b.pt

üåç Processing language: hi (446,306 tokens)


üîÑ Forward passes for hi:   0%|          | 0/218 [00:00<?, ?it/s]

‚úÖ Saved activations for 'hi' ‚Üí data/activation.hi.train.1M.llama3b.pt

üéØ Activation collection completed for all languages (1M-token datasets).


In [6]:
import torch
import numpy as np
import pandas as pd
import os

# --- File path ---
file_path = 'data/activation.hi.train.1M.llama3b.pt'

# --- Load the .pt file ---
print(f"Loading tensor from: {file_path}")
data = torch.load(file_path, map_location='cpu')

# --- Extract the 'over_zero' tensor ---
if 'over_zero' not in data:
    raise KeyError("‚ùå Key 'over_zero' not found in the loaded file. Available keys: " + str(list(data.keys())))

over_zero = data['over_zero'].cpu()  # Expected shape: 28 x 8192
print(f"‚úÖ Loaded 'over_zero' tensor with shape: {tuple(over_zero.shape)}")

# --- Convert to NumPy for easier handling ---
matrix = over_zero.numpy()

# --- Show summary to avoid console flooding ---
print("\nüîπ Tensor Summary:")
print(f"Shape: {matrix.shape}")
print(f"Min: {matrix.min()}, Max: {matrix.max()}, Mean: {matrix.mean()}")
print("\nüîπ Sample (first 3 rows):")
print(matrix[:3])  # print only first 3 rows for readability

# --- Save to CSV ---
csv_path = file_path.replace(".pt", ".csv")
os.makedirs(os.path.dirname(csv_path), exist_ok=True)
df = pd.DataFrame(matrix)
df.to_csv(csv_path, index=False)

print(f"\n‚úÖ over_zero tensor successfully saved to CSV:")
print(f"üìÇ {csv_path}")


Loading tensor from: data/activation.hi.train.1M.llama3b.pt
‚úÖ Loaded 'over_zero' tensor with shape: (28, 8192)

üîπ Tensor Summary:
Shape: (28, 8192)
Min: 0, Max: 890880, Mean: 445263.85991123744

üîπ Sample (first 3 rows):
[[435993 436378 434422 ... 329206 424425 438842]
 [457913 449732 440073 ... 308689 436481 321708]
 [477596 431045 451697 ... 448848 457595 498834]]

‚úÖ over_zero tensor successfully saved to CSV:
üìÇ data/activation.hi.train.1M.llama3b.csv


In [7]:

# --- File path ---
file_path = 'data/activation.en.train.1M.llama3b.pt'

# --- Load the .pt file ---
print(f"Loading tensor from: {file_path}")
data = torch.load(file_path, map_location='cpu')

# --- Extract the 'over_zero' tensor ---
if 'over_zero' not in data:
    raise KeyError("‚ùå Key 'over_zero' not found in the loaded file. Available keys: " + str(list(data.keys())))

over_zero = data['over_zero'].cpu()  # Expected shape: 28 x 8192
print(f"‚úÖ Loaded 'over_zero' tensor with shape: {tuple(over_zero.shape)}")

# --- Convert to NumPy for easier handling ---
matrix = over_zero.numpy()

# --- Show summary to avoid console flooding ---
print("\nüîπ Tensor Summary:")
print(f"Shape: {matrix.shape}")
print(f"Min: {matrix.min()}, Max: {matrix.max()}, Mean: {matrix.mean()}")
print("\nüîπ Sample (first 3 rows):")
print(matrix[:3])  # print only first 3 rows for readability

# --- Save to CSV ---
csv_path = file_path.replace(".pt", ".csv")
os.makedirs(os.path.dirname(csv_path), exist_ok=True)
df = pd.DataFrame(matrix)
df.to_csv(csv_path, index=False)

print(f"\n‚úÖ over_zero tensor successfully saved to CSV:")
print(f"üìÇ {csv_path}")


Loading tensor from: data/activation.en.train.1M.llama3b.pt
‚úÖ Loaded 'over_zero' tensor with shape: (28, 8192)

üîπ Tensor Summary:
Shape: (28, 8192)
Min: 0, Max: 444416, Mean: 222106.46555873327

üîπ Sample (first 3 rows):
[[213640 237007 202883 ... 164857 214296 238791]
 [236456 215944 212300 ... 131038 216587 139884]
 [231557 228562 232253 ... 220941 218247 229130]]

‚úÖ over_zero tensor successfully saved to CSV:
üìÇ data/activation.en.train.1M.llama3b.csv


**identify.py**

In [8]:
import torch
import os

# --- Configuration ---
languages = ['en', 'hi']  # your hi-en dataset
activation_dir = 'data'
output_dir = 'activation_mask'
os.makedirs(output_dir, exist_ok=True)

top_rate = 0.01           # fraction of neurons to select (entropy-based)
filter_rate = 0.95        # top 5% neurons threshold
activation_bar_ratio = 0.95  # top 5% activation threshold

# --- Load activation data ---
over_zero = []
for lang in languages:
    file_path = os.path.join(activation_dir, f'activation.{lang}.train.1M.llama3b.pt')
    data = torch.load(file_path)
    over_zero.append(data['over_zero'])

# shape: [layers, neurons, languages]
over_zero = torch.stack(over_zero, dim=-1)
num_layers, intermediate_size, lang_num = over_zero.size()
print(f"{lang_num} languages, {num_layers} layers, {intermediate_size} neurons per layer")

# --- Compute activation probabilities across languages ---
activation_probs = over_zero / over_zero.sum(dim=-1, keepdim=True)
activation_probs[torch.isnan(activation_probs)] = 0

# --- Compute entropy across languages ---
log_probs = torch.where(activation_probs > 0, activation_probs.log(), 0)
entropy = -torch.sum(activation_probs * log_probs, dim=-1)  # shape: [layers, neurons]

# --- Filter neurons with very low activation ---
flattened_probs = activation_probs.flatten()
top_prob_value = flattened_probs.kthvalue(round(len(flattened_probs) * filter_rate)).values.item()
active_neurons = (activation_probs > top_prob_value).sum(dim=-1)
entropy[active_neurons == 0] = torch.inf  # ignore inactive neurons

# --- Select top neurons by smallest entropy (language-specific) ---
flattened_entropy = entropy.flatten()
top_entropy_count = round(len(flattened_entropy) * top_rate)
entropy_values, index = flattened_entropy.topk(top_entropy_count, largest=False)  # smallest entropy = language-specific

row_index = index // entropy.size(1)
col_index = index % entropy.size(1)

# --- Determine which language dominates each top neuron ---
# For each selected neuron (layer,row_index,col_index),
# check which language has the higher activation probability
dominant_lang = []
for r, c in zip(row_index.tolist(), col_index.tolist()):
    probs = activation_probs[r, c]  # shape [languages]
    winner = probs.argmax().item()  # 0 -> en, 1 -> hi
    dominant_lang.append(winner)

dominant_lang = torch.tensor(dominant_lang)
counts = torch.bincount(dominant_lang, minlength=len(languages))

total = counts.sum().item()
print("\nüîç Top 1% language dominance based on entropy:")
for i, lang in enumerate(languages):
    pct = 100 * counts[i].item() / total if total > 0 else 0
    print(f"   {lang.upper():>3}: {counts[i].item()} neurons ({pct:.2f}%)")

# --- Build and save per-language masks separately (same as before) ---
for lang_id, lang_name in enumerate(languages):
    lang_probs = activation_probs[:, :, lang_id]
    selected_mask = torch.zeros_like(lang_probs, dtype=torch.bool)
    selected_mask[row_index, col_index] = True
    activation_bar = flattened_probs.kthvalue(round(len(flattened_probs) * activation_bar_ratio)).values.item()
    selected_mask &= (lang_probs > activation_bar)

    layer_indices = []
    for layer in range(num_layers):
        neurons = torch.where(selected_mask[layer])[0]
        layer_indices.append(neurons)

    output_file = os.path.join(output_dir, f"llama3b_{lang_name}_mask.pt")
    torch.save(layer_indices, output_file)
    print(f"‚úÖ Activation mask for {lang_name} saved to: {output_file}")


2 languages, 28 layers, 8192 neurons per layer

üîç Top 1% language dominance based on entropy:
    EN: 0 neurons (0.00%)
    HI: 2294 neurons (100.00%)
‚úÖ Activation mask for en saved to: activation_mask/llama3b_en_mask.pt
‚úÖ Activation mask for hi saved to: activation_mask/llama3b_hi_mask.pt


In [9]:
import torch
import os

# --- Configuration ---
output_dir = 'activation_mask'
languages = ['en', 'hi']

for lang in languages:
    file_path = os.path.join(output_dir, f'llama3b_{lang}_mask.pt')

    # Load the saved mask
    layer_masks = torch.load(file_path)

    print(f"\n--- Activation mask for {lang.upper()} ---")
    print("layer-wise neuron indices = [")

    for i, neuron_tensor in enumerate(layer_masks):
        print(f"    layer {i}: tensor({neuron_tensor.tolist()})")

    print("]\n")



--- Activation mask for EN ---
layer-wise neuron indices = [
    layer 0: tensor([])
    layer 1: tensor([])
    layer 2: tensor([])
    layer 3: tensor([])
    layer 4: tensor([])
    layer 5: tensor([])
    layer 6: tensor([])
    layer 7: tensor([])
    layer 8: tensor([])
    layer 9: tensor([])
    layer 10: tensor([])
    layer 11: tensor([])
    layer 12: tensor([])
    layer 13: tensor([])
    layer 14: tensor([])
    layer 15: tensor([])
    layer 16: tensor([])
    layer 17: tensor([])
    layer 18: tensor([])
    layer 19: tensor([])
    layer 20: tensor([])
    layer 21: tensor([])
    layer 22: tensor([])
    layer 23: tensor([])
    layer 24: tensor([])
    layer 25: tensor([])
    layer 26: tensor([])
    layer 27: tensor([])
]


--- Activation mask for HI ---
layer-wise neuron indices = [
    layer 0: tensor([268, 513, 988, 1365, 1652, 1934, 2353, 2788, 2815, 2990, 3451, 3936, 4278, 5281, 5758, 5943, 6197, 6237, 6248, 6384, 7374, 7503, 8004])
    layer 1: tensor([46, 4

**ppl.py**

In [11]:
import argparse
from types import MethodType
import numpy as np
import torch
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer
import os

# -------------------- Argument Parser --------------------
parser = argparse.ArgumentParser(description="Evaluate LLaMA model with language-specific activation masks.")
parser.add_argument(
    "-m", "--model",
    type=str,
    default="meta-llama/Llama-3.2-3B",
    help="Model name or path (default: meta-llama/Llama-3.2-3B)"
)
parser.add_argument(
    "-d", "--data_dir",
    type=str,
    default="data",
    help="Directory containing tokenized validation data (id.<lang>.valid.llama.pt)"
)
parser.add_argument(
    "-a", "--activation_mask_dir",
    type=str,
    default="activation_mask",
    help="Directory containing per-language activation masks (llama3b_<lang>_mask.pt)"
)
args, unknown = parser.parse_known_args()
if unknown:
    print(f"Ignoring unknown arguments: {unknown}")

# -------------------- Load Model & Tokenizer --------------------
print("üîπ Loading model...")
tokenizer = AutoTokenizer.from_pretrained(args.model)
model = AutoModelForCausalLM.from_pretrained(
    args.model,
    torch_dtype=torch.float16,
    device_map="auto"
)
model.eval()
print("‚úÖ Model loaded.")

num_layers = model.config.num_hidden_layers
intermediate_size = model.config.intermediate_size
max_length = model.config.max_position_embeddings

print(f"üìä Layers: {num_layers}, Hidden neurons per MLP: {intermediate_size}")

# -------------------- Languages --------------------
languages = ["en", "hi"]

# Load activation masks
activation_masks = []
for lang in languages:
    mask_path = os.path.join(args.activation_mask_dir, f"llama3b_{lang}_mask.pt")
    if os.path.exists(mask_path):
        print(f"üîπ Found activation mask for {lang}: {mask_path}")
        activation_masks.append(torch.load(mask_path))
    else:
        print(f"‚ö†Ô∏è No activation mask found for {lang}, using None")
        activation_masks.append(None)

# -------------------- Helper: Forward Patch Factory --------------------
def factory(mask):
    """Custom forward method that zeroes out masked neurons."""
    def llama_forward(self, x):
        gate_up = self.gate_proj(x)
        up = self.up_proj(x)
        activation = F.silu(gate_up) * up

        # Zero out masked neurons
        if mask is not None and mask.numel() > 0:
            activation.index_fill_(2, mask.to(x.device), 0)

        return self.down_proj(activation)
    return llama_forward

# -------------------- Evaluation --------------------
final_output = []

for activation_mask, mask_lang in zip(activation_masks, languages):
    print(f"\nüîß Applying activation mask for: {mask_lang.upper()}")

    # Apply mask to each MLP layer
    if activation_mask is not None:
        for i, layer_mask in enumerate(activation_mask):
            if layer_mask.numel() == 0:
                continue
            obj = model.model.layers[i].mlp
            obj.forward = MethodType(factory(layer_mask), obj)

    # -------------------- Evaluate PPL across languages --------------------
    ppls = []
    for lang in languages:
        val_path = os.path.join(args.data_dir, f"id.{lang}.valid.1M.llama.pt")
        if not os.path.exists(val_path):
            print(f"‚ùå Missing validation file: {val_path}")
            ppls.append(float("nan"))
            continue

        ids = torch.load(val_path)
        total_len = ids.size(0)
        max_len = min(max_length, 1024)
        total_len = (total_len // max_len) * max_len
        input_ids = ids[:total_len].reshape(-1, max_len)

        print(f"   üîπ Evaluating {lang.upper()} ({input_ids.shape[0]} sequences)...")

        losses = []
        for batch in input_ids.to(model.device):
            with torch.no_grad():
                out = model(batch.unsqueeze(0), labels=batch.unsqueeze(0))
                losses.append(out.loss.item())

        mean_loss = np.mean(losses)
        ppls.append(mean_loss)

    final_output.append(ppls)

# -------------------- Print Results --------------------
print("\nüìä Average Negative Log-Likelihood (proxy for PPL):")
header = "MASK_LANG | " + " | ".join([f"{l.upper():>5}" for l in languages])
print(header)
print("-" * len(header))

for mask_lang, ppls in zip(languages, final_output):
    row = f"{mask_lang.upper():>9} | " + " | ".join([f"{ppl:.3f}" if not np.isnan(ppl) else "  N/A " for ppl in ppls])
    print(row)


Ignoring unknown arguments: ['-f', '/root/.local/share/jupyter/runtime/kernel-bd42ecb1-6f72-46bc-bc20-af423b9f2ca3.json']
üîπ Loading model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

‚úÖ Model loaded.
üìä Layers: 28, Hidden neurons per MLP: 8192
üîπ Found activation mask for en: activation_mask/llama3b_en_mask.pt
üîπ Found activation mask for hi: activation_mask/llama3b_hi_mask.pt

üîß Applying activation mask for: EN
   üîπ Evaluating EN (22 sequences)...
   üîπ Evaluating HI (22 sequences)...

üîß Applying activation mask for: HI
   üîπ Evaluating EN (22 sequences)...
   üîπ Evaluating HI (22 sequences)...

üìä Average Negative Log-Likelihood (proxy for PPL):
MASK_LANG |    EN |    HI
-------------------------
       EN | 2.162 | 1.579
       HI | 2.317 | 1.715


**Generate.py**

In [14]:
# List of prompts
prompts = [
    "How can I improve my time management skills?",
    "What are the most effective ways to deal with stress?",
    "What are the main differences between Python and JavaScript programming languages?",
    "How can I increase my productivity while working from home?",
    "Can you explain the basics of quantum computing?",
    "What are the differences between plant-based and animal-based protein sources?",
    "How can I develop my critical thinking skills?",
    "What are the major challenges faced by the education sector today?",
    "What are the primary factors that influence consumer behavior?",
    "What are the most effective strategies for conflict resolution in the workplace?",
    "What are some potential implications of using a single-use plastic bottle versus a reusable bottle on both the environment and human health?",
    "What factors would you consider when designing an inclusive and accessible public transportation system?",
    "How can governments utilize fiscal and monetary policies to combat economic recessions?",
    "How do language and cultural barriers affect the way people communicate and form relationships in multicultural societies?",
    "Describe a scenario where artificial intelligence could be used to improve the quality and efficiency of healthcare delivery.",
    "Explain the process of gene editing using CRISPR-Cas9 technology, and discuss its potential applications and ethical implications.",
    "How do vaccinations work to protect individuals and communities from infectious diseases, and what is herd immunity?",
    "How do social media platforms influence the way people consume and share news, and what are the potential implications for the spread of misinformation?",
    "How do cultural, social, and economic factors influence people's food choices, and how can this knowledge be used to promote healthier diets?",
    "Explain the process of natural selection and how it contributes to the evolution and adaptation of species.",
    "How would you introduce yourself as a medieval knight at a royal banquet?",
    "As a pirate captain, what would you say to your crew to motivate them to search for hidden treasure?",
    "If you were a Shakespearean character, how would you declare your love for someone in a soliloquy?",
    "As a superhero, how would you explain your origin story to a curious child?",
    "Imagine you are a time traveler from the year 3000. What technological advancements would you tell people about?",
    "As a sports commentator, describe the winning play in the final seconds of a championship game.",
    "Pretend to be a world-famous chef. How would you describe your signature dish to a panel of judges?",
    "You are a mountain climber reaching the summit of Mount Everest. Describe your emotions and the view from the top.",
    "As a space colonist on Mars, describe your daily life and the challenges you face living on another planet.",
    "Pretend to be a character in a post-apocalyptic world. Describe how you survive and the allies you encounter.",
    "How can you determine if a restaurant is popular among locals or mainly attracts tourists, and why might this information be useful?",
    "What are some subtle clues that suggest someone is pretending to understand a topic or conversation when they are actually confused or uninformed?",
    "Why might someone choose to use a paper map or ask for directions instead of relying on a GPS device or smartphone app?",
    "How can you determine if a person is genuinely interested in a conversation or simply being polite?",
    "Why might someone prefer to shop at a small, locally-owned business instead of a large chain store, even if the prices are higher?",
    "How can you assess the credibility of a source of information, such as a news article or blog post, without relying solely on the reputation of the author or publisher?",
    "Why do some people enjoy the sensation of being scared, such as by watching horror movies or going on roller coasters, while others avoid these experiences?",
    "How can observing the behavior of other people in a social situation provide clues about cultural norms and expectations?",
    "Do we have a moral obligation to explore space, or should we focus on solving Earth's problems first?",
    "In a world where automation is becoming increasingly prevalent, is it more important to prioritize job creation or technological progress?",
    "How many times does the average human blink in a lifetime? Try to explain your answer step-by-step.",
    "How many atoms are in a grain of salt? Try to explain your answer step-by-step.",
    "How many lightning strikes occur on Earth each day? Try to explain your answer step-by-step.",
    "How many balloons would it take to lift a house like in the movie 'Up'? Try to explain your answer step-by-step.",
    "How many text messages are sent globally in a minute? Try to explain your answer step-by-step.",
    "How many words are spoken daily on Earth? Try to explain your answer step-by-step.",
    "How many snowflakes fall during a typical winter? Try to explain your answer step-by-step.",
    "How many pages are in all the books ever written? Try to explain your answer step-by-step.",
    "How many times has the Earth orbited the Sun since the beginning of life? Try to explain your answer step-by-step.",
    "How many songs have been recorded throughout history? Try to explain your answer step-by-step.",
    "What if the Internet had been invented during the Renaissance period?",
    "What if the Aztecs had successfully repelled the Spanish conquistadors?",
    "What if the Black Death had not occurred in the 14th century?",
    "What if Isaac Newton had focused on biology instead of physics?",
    "What if the Beatles had never formed as a band?",
    "What if Alan Turing had not cracked the Enigma code during World War II?",
    "What if the Suez Canal had never been constructed?",
    "What if the Maya civilization had never mysteriously collapsed?",
    "What if Christopher Columbus had not discovered the Americas?",
    "What if Vincent van Gogh had been a successful artist during his lifetime?",
    "Can you help me write a formal email to a potential business partner proposing a joint venture?",
    "Can you help me write a resignation letter to my current employer, while leaving on good terms and expressing gratitude for the opportunities provided?",
    "Use an appropriate format to structure a formal letter of recommendation for a student applying to a prestigious graduate program in computer science.",
    "Write a compelling product launch announcement email to inform our customers of our new software solution.",
    "Draft an apology email to a customer who experienced a delay in their order, and provide reassurance that the issue has been resolved.",
    "Write a script for a YouTube video exploring the history and cultural significance of jazz.",
    "Compose an engaging travel blog post about a recent trip to Hawaii, highlighting cultural experiences and must-see attractions.",
    "Write a captivating movie review for a recently released science fiction film, discussing its plot, characters, and special effects.",
    "Structure a podcast script for an episode discussing the influence of streaming platforms on the music industry.",
    "Write a symphony concert review, discussing the orchestra's performance and overall audience experience."
]

# Create directory if it doesn't exist
os.makedirs("dataset/mvicuna", exist_ok=True)

# Write prompts to en.txt
with open("dataset/mvicuna/en.txt", "w", encoding="utf-8") as f:
    for prompt in prompts:
        f.write(prompt + "\n")

print("‚úÖ Saved prompts to dataset/mvicuna/en.txt")


‚úÖ Saved prompts to dataset/mvicuna/en.txt


In [15]:
hi_prompts = [
    # Self-improvement
    "‡§Æ‡•à‡§Ç ‡§Ö‡§™‡§®‡•á ‡§∏‡§Æ‡§Ø ‡§™‡•ç‡§∞‡§¨‡§Ç‡§ß‡§® ‡§ï‡•å‡§∂‡§≤ ‡§ï‡•ã ‡§ï‡•à‡§∏‡•á ‡§∏‡•Å‡§ß‡§æ‡§∞ ‡§∏‡§ï‡§§‡§æ ‡§π‡•Ç‡§Å?",
    "‡§§‡§®‡§æ‡§µ ‡§∏‡•á ‡§®‡§ø‡§™‡§ü‡§®‡•á ‡§ï‡•á ‡§∏‡§¨‡§∏‡•á ‡§™‡•ç‡§∞‡§≠‡§æ‡§µ‡•Ä ‡§§‡§∞‡•Ä‡§ï‡•á ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•à‡§Ç?",
    "‡§Æ‡•à‡§Ç ‡§Ö‡§™‡§®‡•Ä ‡§Ü‡§≤‡•ã‡§ö‡§®‡§æ‡§§‡•ç‡§Æ‡§ï ‡§∏‡•ã‡§ö ‡§ï‡•å‡§∂‡§≤ ‡§ï‡•à‡§∏‡•á ‡§µ‡§ø‡§ï‡§∏‡§ø‡§§ ‡§ï‡§∞ ‡§∏‡§ï‡§§‡§æ ‡§π‡•Ç‡§Å?",
    "‡§ò‡§∞ ‡§∏‡•á ‡§ï‡§æ‡§Æ ‡§ï‡§∞‡§§‡•á ‡§∏‡§Æ‡§Ø ‡§Ö‡§™‡§®‡•Ä ‡§â‡§§‡•ç‡§™‡§æ‡§¶‡§ï‡§§‡§æ ‡§ï‡•à‡§∏‡•á ‡§¨‡§¢‡§º‡§æ ‡§∏‡§ï‡§§‡§æ ‡§π‡•Ç‡§Å?",
    "‡§∏‡§ï‡§æ‡§∞‡§æ‡§§‡•ç‡§Æ‡§ï ‡§Ü‡§¶‡§§‡•á‡§Ç ‡§µ‡§ø‡§ï‡§∏‡§ø‡§§ ‡§ï‡§∞‡§®‡•á ‡§ï‡•á ‡§≤‡§ø‡§è ‡§ï‡•ç‡§Ø‡§æ ‡§ï‡§¶‡§Æ ‡§â‡§†‡§æ‡§è ‡§ú‡§æ ‡§∏‡§ï‡§§‡•á ‡§π‡•à‡§Ç?",

    # Science & Technology
    "‡§ï‡•ç‡§µ‡§æ‡§Ç‡§ü‡§Æ ‡§ï‡§Ç‡§™‡•ç‡§Ø‡•Ç‡§ü‡§ø‡§Ç‡§ó ‡§ï‡•Ä ‡§Æ‡•Ç‡§≤ ‡§¨‡§æ‡§§‡•á‡§Ç ‡§∏‡§Æ‡§ù‡§æ‡§á‡§è‡•§",
    "CRISPR-Cas9 ‡§§‡§ï‡§®‡•Ä‡§ï ‡§ï‡•á ‡§Æ‡§æ‡§ß‡•ç‡§Ø‡§Æ ‡§∏‡•á ‡§ú‡•Ä‡§® ‡§∏‡§Ç‡§™‡§æ‡§¶‡§® ‡§ï‡•Ä ‡§™‡•ç‡§∞‡§ï‡•ç‡§∞‡§ø‡§Ø‡§æ ‡§¨‡§§‡§æ‡§á‡§è‡•§",
    "‡§µ‡•à‡§ï‡•ç‡§∏‡•Ä‡§® ‡§ï‡•à‡§∏‡•á ‡§ï‡§æ‡§Æ ‡§ï‡§∞‡§§‡•Ä ‡§π‡•à‡§Ç ‡§î‡§∞ ‡§π‡§∞‡•ç‡§° ‡§á‡§Æ‡•ç‡§Ø‡•Å‡§®‡§ø‡§ü‡•Ä ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•à?",
    "‡§™‡•ç‡§≤‡§æ‡§∏‡•ç‡§ü‡§ø‡§ï ‡§ï‡•Ä ‡§è‡§ï‡§≤-‡§â‡§™‡§Ø‡•ã‡§ó ‡§µ‡§æ‡§≤‡•Ä ‡§¨‡•ã‡§§‡§≤ ‡§î‡§∞ ‡§™‡•Å‡§®: ‡§™‡•ç‡§∞‡§Ø‡•ã‡§ú‡•ç‡§Ø ‡§¨‡•ã‡§§‡§≤ ‡§ï‡§æ ‡§™‡§∞‡•ç‡§Ø‡§æ‡§µ‡§∞‡§£ ‡§î‡§∞ ‡§∏‡•ç‡§µ‡§æ‡§∏‡•ç‡§•‡•ç‡§Ø ‡§™‡§∞ ‡§ï‡•ç‡§Ø‡§æ ‡§™‡•ç‡§∞‡§≠‡§æ‡§µ ‡§™‡§°‡§º‡§§‡§æ ‡§π‡•à?",
    "Python ‡§î‡§∞ JavaScript ‡§™‡•ç‡§∞‡•ã‡§ó‡•ç‡§∞‡§æ‡§Æ‡§ø‡§Ç‡§ó ‡§≠‡§æ‡§∑‡§æ‡§ì‡§Ç ‡§ï‡•á ‡§Æ‡•Å‡§ñ‡•ç‡§Ø ‡§Ö‡§Ç‡§§‡§∞ ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•à‡§Ç?",

    # Daily life & practical scenarios
    "‡§ò‡§∞ ‡§∏‡•á ‡§ï‡§æ‡§Æ ‡§ï‡§∞‡§§‡•á ‡§∏‡§Æ‡§Ø ‡§∏‡§Æ‡§Ø ‡§î‡§∞ ‡§ä‡§∞‡•ç‡§ú‡§æ ‡§ï‡§æ ‡§™‡•ç‡§∞‡§¨‡§Ç‡§ß‡§® ‡§ï‡•à‡§∏‡•á ‡§ï‡§∞‡•á‡§Ç?",
    "‡§∏‡§æ‡§Æ‡§æ‡§ú‡§ø‡§ï ‡§∏‡•ç‡§•‡§ø‡§§‡§ø‡§Ø‡•ã‡§Ç ‡§Æ‡•á‡§Ç ‡§¶‡•Ç‡§∏‡§∞‡•ã‡§Ç ‡§ï‡•á ‡§µ‡•ç‡§Ø‡§µ‡§π‡§æ‡§∞ ‡§ï‡•ã ‡§¶‡•á‡§ñ‡§ï‡§∞ ‡§∏‡§æ‡§Ç‡§∏‡•ç‡§ï‡•É‡§§‡§ø‡§ï ‡§®‡§ø‡§Ø‡§Æ ‡§ï‡•à‡§∏‡•á ‡§∏‡§Æ‡§ù‡•á ‡§ú‡§æ ‡§∏‡§ï‡§§‡•á ‡§π‡•à‡§Ç?",
    "‡§è‡§ï ‡§∞‡•á‡§∏‡•ç‡§ü‡•ã‡§∞‡•á‡§Ç‡§ü ‡§ï‡•Ä ‡§≤‡•ã‡§ï‡§™‡•ç‡§∞‡§ø‡§Ø‡§§‡§æ ‡§ï‡•ã ‡§ï‡•à‡§∏‡•á ‡§™‡§∞‡§ñ‡§æ ‡§ú‡§æ ‡§∏‡§ï‡§§‡§æ ‡§π‡•à?",
    "‡§ï‡§ø‡§∏‡•Ä ‡§∏‡•ç‡§∞‡•ã‡§§ ‡§ï‡•Ä ‡§µ‡§ø‡§∂‡•ç‡§µ‡§∏‡§®‡•Ä‡§Ø‡§§‡§æ ‡§ï‡§æ ‡§Æ‡•Ç‡§≤‡•ç‡§Ø‡§æ‡§Ç‡§ï‡§® ‡§ï‡•à‡§∏‡•á ‡§ï‡§ø‡§Ø‡§æ ‡§ú‡§æ‡§è?",
    "‡§µ‡•ç‡§Ø‡§ï‡•ç‡§§‡§ø‡§ó‡§§ ‡§î‡§∞ ‡§µ‡•ç‡§Ø‡§æ‡§µ‡§∏‡§æ‡§Ø‡§ø‡§ï ‡§ú‡•Ä‡§µ‡§® ‡§Æ‡•á‡§Ç ‡§ß‡•ç‡§Ø‡§æ‡§® ‡§î‡§∞ ‡§´‡•ã‡§ï‡§∏ ‡§ï‡•à‡§∏‡•á ‡§¨‡§®‡§æ‡§è ‡§∞‡§ñ‡•á‡§Ç?",

    # Imaginative & creative scenarios
    "‡§è‡§ï ‡§∏‡§Æ‡•Å‡§¶‡•ç‡§∞‡•Ä ‡§°‡§æ‡§ï‡•Ç ‡§ï‡§™‡•ç‡§§‡§æ‡§® ‡§ï‡•á ‡§∞‡•Ç‡§™ ‡§Æ‡•á‡§Ç ‡§Ö‡§™‡§®‡•á ‡§ï‡•ç‡§∞‡•Ç ‡§ï‡•ã ‡§ñ‡§ú‡§æ‡§®‡§æ ‡§ñ‡•ã‡§ú‡§®‡•á ‡§ï‡•á ‡§≤‡§ø‡§è ‡§ï‡•à‡§∏‡•á ‡§™‡•ç‡§∞‡•ã‡§§‡•ç‡§∏‡§æ‡§π‡§ø‡§§ ‡§ï‡§∞‡•á‡§Ç‡§ó‡•á?",
    "‡§Ø‡§¶‡§ø ‡§Ü‡§™ ‡§è‡§ï ‡§∏‡•Å‡§™‡§∞‡§π‡•Ä‡§∞‡•ã ‡§π‡•ã‡§Ç, ‡§§‡•ã ‡§Ö‡§™‡§®‡•á ‡§â‡§§‡•ç‡§™‡§§‡•ç‡§§‡§ø ‡§ï‡•Ä ‡§ï‡§π‡§æ‡§®‡•Ä ‡§ï‡•à‡§∏‡•á ‡§¨‡§§‡§æ‡§è‡§Ç‡§ó‡•á?",
    "‡§ï‡§≤‡•ç‡§™‡§®‡§æ ‡§ï‡•Ä‡§ú‡§ø‡§è ‡§ï‡§ø ‡§Ü‡§™ ‡§µ‡§∞‡•ç‡§∑ 3000 ‡§∏‡•á ‡§∏‡§Æ‡§Ø ‡§Ø‡§æ‡§§‡•ç‡§∞‡•Ä ‡§π‡•à‡§Ç‡•§ ‡§≤‡•ã‡§ó‡•ã‡§Ç ‡§ï‡•ã ‡§ï‡§ø‡§® ‡§§‡§ï‡§®‡•Ä‡§ï‡•Ä ‡§µ‡§ø‡§ï‡§æ‡§∏‡•ã‡§Ç ‡§ï‡•á ‡§¨‡§æ‡§∞‡•á ‡§Æ‡•á‡§Ç ‡§¨‡§§‡§æ‡§è‡§Ç‡§ó‡•á?",
    "‡§è‡§ï ‡§Ö‡§Ç‡§§‡§∞‡§ø‡§ï‡•ç‡§∑ ‡§â‡§™‡§®‡§ø‡§µ‡•á‡§∂‡§ï‡§∞‡•ç‡§§‡§æ ‡§ï‡•á ‡§∞‡•Ç‡§™ ‡§Æ‡•á‡§Ç ‡§Æ‡§Ç‡§ó‡§≤ ‡§™‡§∞ ‡§Ö‡§™‡§®‡•á ‡§¶‡•à‡§®‡§ø‡§ï ‡§ú‡•Ä‡§µ‡§® ‡§î‡§∞ ‡§ö‡•Å‡§®‡•å‡§§‡§ø‡§Ø‡•ã‡§Ç ‡§ï‡§æ ‡§µ‡§∞‡•ç‡§£‡§® ‡§ï‡•Ä‡§ú‡§ø‡§è‡•§",
    "‡§™‡•ã‡§∏‡•ç‡§ü-‡§è‡§™‡•ã‡§ï‡•à‡§≤‡§ø‡§™‡•ç‡§ü‡§ø‡§ï ‡§¶‡•Å‡§®‡§ø‡§Ø‡§æ ‡§Æ‡•á‡§Ç ‡§è‡§ï ‡§™‡§æ‡§§‡•ç‡§∞ ‡§ï‡•á ‡§∞‡•Ç‡§™ ‡§Æ‡•á‡§Ç ‡§Ö‡§™‡§®‡•á ‡§â‡§§‡•ç‡§§‡§∞‡§ú‡•Ä‡§µ‡§ø‡§§‡§æ ‡§ï‡•å‡§∂‡§≤ ‡§ï‡§æ ‡§µ‡§∞‡•ç‡§£‡§® ‡§ï‡§∞‡•á‡§Ç‡•§",

    # History & what-if scenarios
    "‡§Ø‡§¶‡§ø ‡§¨‡•ç‡§≤‡•à‡§ï ‡§°‡•á‡§• 14‡§µ‡•Ä‡§Ç ‡§∏‡§¶‡•Ä ‡§Æ‡•á‡§Ç ‡§®‡§π‡•Ä‡§Ç ‡§π‡•Å‡§Ü ‡§π‡•ã‡§§‡§æ ‡§§‡•ã ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•ã‡§§‡§æ?",
    "‡§Ö‡§ó‡§∞ ‡§¨‡•Ä‡§ü‡§≤‡•ç‡§∏ ‡§ï‡§≠‡•Ä ‡§è‡§ï ‡§¨‡•à‡§Ç‡§° ‡§ï‡•á ‡§∞‡•Ç‡§™ ‡§Æ‡•á‡§Ç ‡§®‡§π‡•Ä‡§Ç ‡§¨‡§®‡•á ‡§π‡•ã‡§§‡•á ‡§§‡•ã ‡§∏‡§Ç‡§ó‡•Ä‡§§ ‡§â‡§¶‡•ç‡§Ø‡•ã‡§ó ‡§™‡§∞ ‡§ï‡•ç‡§Ø‡§æ ‡§™‡•ç‡§∞‡§≠‡§æ‡§µ ‡§™‡§°‡§º‡§§‡§æ?",
    "‡§Ø‡§¶‡§ø ‡§ï‡•ç‡§∞‡§ø‡§∏‡•ç‡§ü‡•ã‡§´‡§∞ ‡§ï‡•ã‡§≤‡§Ç‡§¨‡§∏ ‡§®‡•á ‡§Ö‡§Æ‡•á‡§∞‡§ø‡§ï‡§æ ‡§ï‡•Ä ‡§ñ‡•ã‡§ú ‡§®‡§π‡•Ä‡§Ç ‡§ï‡•Ä ‡§π‡•ã‡§§‡•Ä ‡§§‡•ã ‡§á‡§§‡§ø‡§π‡§æ‡§∏ ‡§ï‡•à‡§∏‡•á ‡§¨‡§¶‡§≤‡§§‡§æ?",
    "‡§Ø‡§¶‡§ø ‡§Æ‡§æ‡§Ø‡§æ ‡§∏‡§≠‡•ç‡§Ø‡§§‡§æ ‡§Ö‡§ö‡§æ‡§®‡§ï ‡§¢‡§π ‡§® ‡§ú‡§æ‡§§‡•Ä ‡§§‡•ã ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•ã‡§§‡§æ?",
    "‡§Ø‡§¶‡§ø ‡§®‡•ç‡§Ø‡•Ç‡§ü‡§® ‡§®‡•á ‡§≠‡•å‡§§‡§ø‡§ï‡•Ä ‡§ï‡•á ‡§¨‡§ú‡§æ‡§Ø ‡§ú‡•Ä‡§µ‡§µ‡§ø‡§ú‡•ç‡§û‡§æ‡§® ‡§™‡§∞ ‡§ß‡•ç‡§Ø‡§æ‡§® ‡§ï‡•á‡§Ç‡§¶‡•ç‡§∞‡§ø‡§§ ‡§ï‡§ø‡§Ø‡§æ ‡§π‡•ã‡§§‡§æ ‡§§‡•ã ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•ã‡§§‡§æ?",

    # Science & estimation questions
    "‡§è‡§ï ‡§Æ‡§æ‡§®‡§ï ‡§µ‡§∞‡•ç‡§∑ ‡§Æ‡•á‡§Ç ‡§™‡•É‡§•‡•ç‡§µ‡•Ä ‡§™‡§∞ ‡§ï‡§ø‡§§‡§®‡•á ‡§¨‡§ø‡§ú‡§≤‡•Ä ‡§ó‡§ø‡§∞‡§§‡•á ‡§π‡•à‡§Ç?",
    "‡§è‡§ï ‡§∏‡§æ‡§≤ ‡§Æ‡•á‡§Ç ‡§Æ‡§æ‡§®‡§µ ‡§∂‡§∞‡•Ä‡§∞ ‡§Æ‡•á‡§Ç ‡§î‡§∏‡§§‡§® ‡§ï‡§ø‡§§‡§®‡•Ä ‡§¨‡§æ‡§∞ ‡§™‡§≤‡§ï ‡§ù‡§™‡§ï‡§§‡•á ‡§π‡•à‡§Ç?",
    "‡§è‡§ï ‡§ó‡•ç‡§∞‡•á‡§® ‡§®‡§Æ‡§ï ‡§Æ‡•á‡§Ç ‡§ï‡§ø‡§§‡§®‡•á ‡§™‡§∞‡§Æ‡§æ‡§£‡•Å ‡§π‡•ã‡§§‡•á ‡§π‡•à‡§Ç? ‡§á‡§∏‡§ï‡§æ ‡§Ö‡§®‡•Å‡§Æ‡§æ‡§® ‡§¨‡§§‡§æ‡§á‡§è‡•§",
    "‡§è‡§ï ‡§Æ‡§ø‡§®‡§ü ‡§Æ‡•á‡§Ç ‡§¶‡•Å‡§®‡§ø‡§Ø‡§æ ‡§≠‡§∞ ‡§Æ‡•á‡§Ç ‡§ï‡§ø‡§§‡§®‡•á ‡§ü‡•á‡§ï‡•ç‡§∏‡•ç‡§ü ‡§∏‡§Ç‡§¶‡•á‡§∂ ‡§≠‡•á‡§ú‡•á ‡§ú‡§æ‡§§‡•á ‡§π‡•à‡§Ç?",
    "‡§™‡•É‡§•‡•ç‡§µ‡•Ä ‡§™‡§∞ ‡§™‡•ç‡§∞‡§§‡§ø‡§¶‡§ø‡§® ‡§ï‡§ø‡§§‡§®‡•á ‡§∂‡§¨‡•ç‡§¶ ‡§¨‡•ã‡§≤‡•á ‡§ú‡§æ‡§§‡•á ‡§π‡•à‡§Ç? ‡§á‡§∏‡§ï‡§æ ‡§Ö‡§®‡•Å‡§Æ‡§æ‡§® ‡§≤‡§ó‡§æ‡§á‡§è‡•§",

    # Practical writing tasks
    "‡§è‡§ï ‡§µ‡•ç‡§Ø‡§µ‡§∏‡§æ‡§Ø‡§ø‡§ï ‡§∏‡§π‡§Ø‡•ã‡§ó ‡§ï‡•á ‡§≤‡§ø‡§è ‡§î‡§™‡§ö‡§æ‡§∞‡§ø‡§ï ‡§à‡§Æ‡•á‡§≤ ‡§ï‡•à‡§∏‡•á ‡§≤‡§ø‡§ñ‡§æ ‡§ú‡§æ‡§è?",
    "‡§ó‡•ç‡§∞‡§æ‡§π‡§ï ‡§ï‡•ã ‡§¶‡•á‡§∞ ‡§π‡•Å‡§è ‡§Ü‡§¶‡•á‡§∂ ‡§ï‡•á ‡§≤‡§ø‡§è ‡§Æ‡§æ‡§´‡•Ä ‡§ï‡§æ ‡§™‡§§‡•ç‡§∞ ‡§ï‡•à‡§∏‡•á ‡§≤‡§ø‡§ñ‡§æ ‡§ú‡§æ‡§è?",
    "‡§ï‡§Ç‡§™‡•ç‡§Ø‡•Ç‡§ü‡§∞ ‡§µ‡§ø‡§ú‡•ç‡§û‡§æ‡§® ‡§Æ‡•á‡§Ç ‡§∏‡•ç‡§®‡§æ‡§§‡§ï ‡§õ‡§æ‡§§‡•ç‡§∞ ‡§ï‡•á ‡§≤‡§ø‡§è ‡§∏‡§ø‡§´‡§æ‡§∞‡§ø‡§∂ ‡§™‡§§‡•ç‡§∞ ‡§ï‡•à‡§∏‡•á ‡§≤‡§ø‡§ñ‡§æ ‡§ú‡§æ‡§è?",
    "‡§®‡§à ‡§∏‡•â‡§´‡§º‡•ç‡§ü‡§µ‡•á‡§Ø‡§∞ ‡§∏‡•Å‡§µ‡§ø‡§ß‡§æ ‡§ï‡•Ä ‡§ò‡•ã‡§∑‡§£‡§æ ‡§ï‡•á ‡§≤‡§ø‡§è ‡§Ü‡§ï‡§∞‡•ç‡§∑‡§ï ‡§à‡§Æ‡•á‡§≤ ‡§ï‡•à‡§∏‡•á ‡§≤‡§ø‡§ñ‡§æ ‡§ú‡§æ‡§è?",
    "‡§Ø‡•Ç‡§ü‡•ç‡§Ø‡•Ç‡§¨ ‡§µ‡•Ä‡§°‡§ø‡§Ø‡•ã ‡§ï‡•á ‡§≤‡§ø‡§è ‡§∏‡•ç‡§ï‡•ç‡§∞‡§ø‡§™‡•ç‡§ü ‡§ï‡•à‡§∏‡•á ‡§§‡•à‡§Ø‡§æ‡§∞ ‡§ï‡•Ä ‡§ú‡§æ‡§è ‡§ú‡•ã ‡§á‡§§‡§ø‡§π‡§æ‡§∏ ‡§î‡§∞ ‡§∏‡§æ‡§Ç‡§∏‡•ç‡§ï‡•É‡§§‡§ø‡§ï ‡§Æ‡§π‡§§‡•ç‡§µ ‡§™‡§∞ ‡§π‡•ã?",

    # Food & lifestyle
    "‡§∏‡§æ‡§Ç‡§∏‡•ç‡§ï‡•É‡§§‡§ø‡§ï, ‡§∏‡§æ‡§Æ‡§æ‡§ú‡§ø‡§ï ‡§î‡§∞ ‡§Ü‡§∞‡•ç‡§•‡§ø‡§ï ‡§ï‡§æ‡§∞‡§ï ‡§≤‡•ã‡§ó‡•ã‡§Ç ‡§ï‡•Ä ‡§≠‡•ã‡§ú‡§® ‡§™‡§∏‡§Ç‡§¶ ‡§ï‡•ã ‡§ï‡•à‡§∏‡•á ‡§™‡•ç‡§∞‡§≠‡§æ‡§µ‡§ø‡§§ ‡§ï‡§∞‡§§‡•á ‡§π‡•à‡§Ç?",
    "‡§∂‡§æ‡§ï‡§æ‡§π‡§æ‡§∞‡•Ä ‡§î‡§∞ ‡§Æ‡§æ‡§Ç‡§∏‡§æ‡§π‡§æ‡§∞‡•Ä ‡§™‡•ç‡§∞‡•ã‡§ü‡•Ä‡§® ‡§∏‡•ç‡§∞‡•ã‡§§‡•ã‡§Ç ‡§ï‡•á ‡§¨‡•Ä‡§ö ‡§Æ‡•Å‡§ñ‡•ç‡§Ø ‡§Ö‡§Ç‡§§‡§∞ ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•à‡§Ç?",
    "‡§∏‡•ç‡§µ‡§∏‡•ç‡§• ‡§Ü‡§π‡§æ‡§∞ ‡§ï‡•ã ‡§¨‡§¢‡§º‡§æ‡§µ‡§æ ‡§¶‡•á‡§®‡•á ‡§ï‡•á ‡§≤‡§ø‡§è ‡§≤‡•ã‡§ó‡•ã‡§Ç ‡§ï‡•á ‡§≠‡•ã‡§ú‡§® ‡§ö‡•Å‡§®‡§æ‡§µ ‡§ï‡§æ ‡§µ‡§ø‡§∂‡•ç‡§≤‡•á‡§∑‡§£ ‡§ï‡•à‡§∏‡•á ‡§ï‡§ø‡§Ø‡§æ ‡§ú‡§æ‡§è?",
    "‡§Ü‡§™ ‡§ï‡§ø‡§∏‡•Ä ‡§∞‡•á‡§∏‡•ç‡§§‡§∞‡§æ‡§Ç ‡§Æ‡•á‡§Ç ‡§Ö‡§™‡§®‡•á ‡§Ö‡§®‡•Å‡§≠‡§µ ‡§î‡§∞ ‡§µ‡•ç‡§Ø‡§Ç‡§ú‡§® ‡§ï‡§æ ‡§µ‡§∞‡•ç‡§£‡§® ‡§ï‡•à‡§∏‡•á ‡§ï‡§∞‡•á‡§Ç‡§ó‡•á?",
    "‡§∏‡•ç‡§•‡§æ‡§®‡•Ä‡§Ø ‡§¨‡§æ‡§ú‡§æ‡§∞ ‡§Æ‡•á‡§Ç ‡§ñ‡§∞‡•Ä‡§¶‡§æ‡§∞‡•Ä ‡§ï‡§∞‡§®‡•á ‡§ï‡•á ‡§ï‡•ç‡§Ø‡§æ ‡§´‡§æ‡§Ø‡§¶‡•á ‡§π‡•à‡§Ç, ‡§î‡§∞ ‡§≤‡•ã‡§ó ‡§á‡§∏‡•á ‡§ï‡•ç‡§Ø‡•ã‡§Ç ‡§ö‡•Å‡§®‡§§‡•á ‡§π‡•à‡§Ç?",

    # Miscellaneous fun & curiosity
    "‡§Ø‡§¶‡§ø ‡§Ü‡§™ ‡§Æ‡§ß‡•ç‡§Ø‡§ï‡§æ‡§≤‡•Ä‡§® ‡§∂‡•Ç‡§∞‡§µ‡•Ä‡§∞ ‡§π‡•ã‡§§‡•á, ‡§§‡•ã ‡§∂‡§æ‡§π‡•Ä ‡§≠‡•ã‡§ú ‡§Æ‡•á‡§Ç ‡§ñ‡•Å‡§¶ ‡§ï‡•ã ‡§ï‡•à‡§∏‡•á ‡§™‡•á‡§∂ ‡§ï‡§∞‡§§‡•á?",
    "‡§Ø‡§¶‡§ø ‡§Ü‡§™ ‡§∂‡•á‡§ï‡•ç‡§∏‡§™‡§ø‡§Ø‡§∞‡§ø‡§Ø‡§® ‡§™‡§æ‡§§‡•ç‡§∞ ‡§π‡•ã‡§§‡•á, ‡§§‡•ã ‡§ï‡§ø‡§∏‡•Ä ‡§∏‡•á ‡§™‡•ç‡§∞‡•á‡§Æ ‡§ï‡•à‡§∏‡•á ‡§ò‡•ã‡§∑‡§ø‡§§ ‡§ï‡§∞‡§§‡•á?",
    "‡§Ø‡§¶‡§ø ‡§Ü‡§™ ‡§™‡•ç‡§∞‡§∏‡§ø‡§¶‡•ç‡§ß ‡§∂‡•á‡§´ ‡§π‡•ã‡§§‡•á, ‡§§‡•ã ‡§Ö‡§™‡§®‡•Ä ‡§ñ‡§æ‡§∏ ‡§°‡§ø‡§∂ ‡§ï‡§æ ‡§µ‡§∞‡•ç‡§£‡§® ‡§ï‡•à‡§∏‡•á ‡§ï‡§∞‡§§‡•á?",
    "‡§Ø‡§¶‡§ø ‡§Ü‡§™ ‡§∏‡§Æ‡§Ø ‡§Ø‡§æ‡§§‡•ç‡§∞‡•Ä ‡§π‡•ã‡§§‡•á, ‡§§‡•ã ‡§™‡•ç‡§∞‡§æ‡§ö‡•Ä‡§® ‡§∏‡§≠‡•ç‡§Ø‡§§‡§æ‡§ì‡§Ç ‡§ï‡•ã ‡§ï‡•ç‡§Ø‡§æ ‡§ú‡•ç‡§û‡§æ‡§® ‡§¶‡•á‡§§‡•á?",
    "‡§Ø‡§¶‡§ø ‡§Ü‡§™ ‡§∏‡•ç‡§™‡•á‡§∏ ‡§ï‡§Æ‡§æ‡§Ç‡§°‡§∞ ‡§π‡•ã‡§§‡•á, ‡§§‡•ã ‡§Æ‡§Ç‡§ó‡§≤ ‡§™‡§∞ ‡§ú‡•Ä‡§µ‡§® ‡§ï‡§æ ‡§¶‡•à‡§®‡§ø‡§ï ‡§µ‡§ø‡§µ‡§∞‡§£ ‡§ï‡•à‡§∏‡•á ‡§¶‡•á‡§Ç‡§ó‡•á?"
]

import os
os.makedirs("dataset/mvicuna", exist_ok=True)
with open("dataset/mvicuna/hi.txt", "w", encoding="utf-8") as f:
    for prompt in hi_prompts:
        f.write(prompt + "\n")

print("‚úÖ Hindi prompts saved to dataset/mvicuna/hi.txt")


‚úÖ Hindi prompts saved to dataset/mvicuna/hi.txt


In [1]:
import json
import os
from types import MethodType

import torch
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer
from tqdm import tqdm

# -------------------- Configuration --------------------
MODEL_NAME = "meta-llama/Llama-3.2-3B"
DATA_DIR = "dataset/mvicuna"  # folder with your question datasets per language
ACTIVATION_MASK_DIR = "activation_mask"  # folder with per-language masks
OUTPUT_DIR = f"results/{MODEL_NAME.split('/')[-1]}/mvicuna"
os.makedirs(OUTPUT_DIR, exist_ok=True)

LANGUAGES = ["en", "hi"]
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

answer_lang = {
    "en": " Answer in English.",
    "hi": " Answer in Hindi.",  # you can customize or use the same as English
}

# -------------------- Load model and tokenizer --------------------
print("üîπ Loading model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    torch_dtype=torch.float16
)
model.eval()
print("‚úÖ Model loaded.")
num_layers = model.config.num_hidden_layers
intermediate_size = model.config.intermediate_size
print(f"üìä Layers: {num_layers}, Hidden neurons per MLP: {intermediate_size}")

# -------------------- Load activation masks --------------------
activation_masks = {}
for lang in LANGUAGES:
    mask_path = os.path.join(ACTIVATION_MASK_DIR, f"llama3b_{lang}_mask.pt")
    if os.path.exists(mask_path):
        activation_masks[lang] = torch.load(mask_path)
        print(f"üîπ Found activation mask for {lang}: {mask_path}")
    else:
        activation_masks[lang] = None
        print(f"‚ö†Ô∏è No activation mask found for {lang}")

# -------------------- Helper: Patch forward function --------------------
def mlp_forward_factory(mask):
    """Return a patched forward method that zeros out neurons in the mask."""
    def forward(self, x):
        gate_up = self.gate_proj(x)
        up = self.up_proj(x)
        activation = F.silu(gate_up) * up
        if mask is not None and mask.numel() > 0:
            activation.index_fill_(2, mask.to(x.device), 0)
        return self.down_proj(activation)
    return forward

# -------------------- Run inference per mask --------------------
for mask_lang, mask in activation_masks.items():
    # Patch model MLPs for this mask
    if mask is not None:
        for i, layer_mask in enumerate(mask):
            if layer_mask.numel() == 0:
                continue
            mlp_layer = model.model.layers[i].mlp
            mlp_layer.forward = MethodType(mlp_forward_factory(layer_mask), mlp_layer)

    # Run inference on EN and HI datasets
    for lang in LANGUAGES:
        input_file = os.path.join(DATA_DIR, f"{lang}.txt")
        if not os.path.exists(input_file):
            print(f"‚ùå Missing dataset for {lang}: {input_file}")
            continue

        with open(input_file, "r", encoding="utf-8") as f:
            texts = [line.strip() + answer_lang[lang] for line in f if line.strip()]
            texts = [f"Q: {t}\nA:" for t in texts]

        results = []
        for t in tqdm(texts, desc=f"{lang} <- mask {mask_lang}"):
            input_ids = tokenizer(t, return_tensors="pt").input_ids.to(DEVICE)
            with torch.no_grad():
                output_ids = model.generate(input_ids, max_new_tokens=256)
            output_text = tokenizer.decode(output_ids[0][input_ids.size(1):], skip_special_tokens=True)
            results.append({"input": t, "output": output_text})

        # Save results
        if mask is not None:
            out_file = os.path.join(OUTPUT_DIR, f"{lang}.perturb.{mask_lang}.jsonl")
        else:
            out_file = os.path.join(OUTPUT_DIR, f"{lang}.jsonl")

        with open(out_file, "w", encoding="utf-8") as f:
            for r in results:
                f.write(json.dumps(r, ensure_ascii=False) + "\n")

        print(f"‚úÖ Saved results: {out_file}")


üîπ Loading model and tokenizer...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

‚úÖ Model loaded.
üìä Layers: 28, Hidden neurons per MLP: 8192
üîπ Found activation mask for en: activation_mask/llama3b_en_mask.pt
üîπ Found activation mask for hi: activation_mask/llama3b_hi_mask.pt


en <- mask en:   0%|          | 0/70 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
en <- mask en:   1%|‚ñè         | 1/70 [00:12<13:54, 12.09s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
en <- mask en:   3%|‚ñé         | 2/70 [00:22<12:36, 11.12s/it]The attention mask and the pad token id were not set. As a consequence, you may observe

‚úÖ Saved results: results/Llama-3.2-3B/mvicuna/en.perturb.en.jsonl


hi <- mask en:   0%|          | 0/45 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
hi <- mask en:   2%|‚ñè         | 1/45 [00:10<07:35, 10.35s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
hi <- mask en:   4%|‚ñç         | 2/45 [00:20<07:26, 10.37s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
hi <- mask en:   7%|‚ñã         | 3/45 [00:31<07:17, 10.42s/it]T

KeyboardInterrupt: 

In [7]:
import shutil
from google.colab import files

# Replace 'folder_name' with your folder
folders_to_download = ['data', 'dataset']

for folder in folders_to_download:
    zip_name = f"{folder}.zip"
    shutil.make_archive(folder, 'zip', folder)
    files.download(zip_name)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [6]:
from google.colab import drive
import shutil

# Mount your Google Drive
drive.mount('/content/drive')

folders_to_download = ['data', 'dataset']

for folder in folders_to_download:
    zip_name = f"/content/drive/MyDrive/{folder}.zip"
    shutil.make_archive(f"/content/{folder}", 'zip', folder)
    shutil.move(f"/content/{folder}.zip", zip_name)
    print(f"‚úÖ {folder} saved to Google Drive: {zip_name}")


Mounted at /content/drive
‚úÖ data saved to Google Drive: /content/drive/MyDrive/data.zip
‚úÖ dataset saved to Google Drive: /content/drive/MyDrive/dataset.zip
