In [15]:
from datasets import load_dataset

ds = load_dataset('openpecha/cleaned_MT_v1.0.3', split='test').select(range(1_000))

In [2]:
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_id = "meta-llama/Llama-3.2-1B-Instruct"

# Configure 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Use 4-bit quantization
    bnb_4bit_quant_type="nf4",  # Normalized float 4 (better accuracy)
    bnb_4bit_compute_dtype=torch.bfloat16,  # Keep computation in bf16 for efficiency
    bnb_4bit_use_double_quant=True  # Double quantization to save memory
)

# Load model and tokenizer with quantization
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    quantization_config=bnb_config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Create the pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer
)

Device set to use cuda:0


In [16]:
def get_translations(examples):
    # Create batch messages for all examples
    batch_messages = [
        [
            {"role": "system", "content": "You are a skilled translator of Tibetan texts. You answer only with the requested translation and nothing else."},
            {"role": "user", "content": f"Translate from Tibetan to English: {example}"}
        ]
        for example in examples['Source']  # 'Source' should be a list in batched examples
    ]

    # Process the batch with the pipeline
    batch_outputs = pipe(
        batch_messages,
        max_new_tokens=256
    )


    examples['pred'] = [output[0]["generated_text"][-1]['content'] if output else "" for output in batch_outputs]

    return examples

# Apply with batched=True
ds = ds.map(get_translations, batched=True, batch_size=16)


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

In [17]:
import evaluate

# Load BLEU and CHRF metrics
bleu_metric = evaluate.load("sacrebleu")
chrf_metric = evaluate.load("chrf")
ter_metric = evaluate.load("ter")

preds = ds['pred']
labels = ds['Target']

# Compute BLEU score
bleu_result = bleu_metric.compute(predictions=preds, references=labels)
bleu_score = bleu_result["score"]

# Compute CHRF score
chrf_result = chrf_metric.compute(predictions=preds, references=labels)
chrf_score = chrf_result["score"]

# Compute TER score
ter_result = ter_metric.compute(predictions=preds, references=labels)
ter_score = ter_result["score"]

metrics = {
        "bleu": round(bleu_score, 4),
        "chrf": round(chrf_score, 4),
        "ter": round(ter_score, 4)
    }

print("Computed Metrics:", metrics)

Computed Metrics: {'bleu': 0.0455, 'chrf': 14.6429, 'ter': 515.8196}
