In [1]:
from datasets import load_dataset

ds = load_dataset('openpecha/cleaned_MT_v1.0.3', split='test')

In [None]:
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_id = "meta-llama/Llama-3.2-1B-Instruct"

# Configure 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Use 4-bit quantization
    bnb_4bit_quant_type="nf4",  # Normalized float 4 (better accuracy)
    bnb_4bit_compute_dtype=torch.bfloat16,  # Keep computation in bf16 for efficiency
    bnb_4bit_use_double_quant=True  # Double quantization to save memory
)

# Load model and tokenizer with quantization
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    quantization_config=bnb_config,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Create the pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer
)

In [None]:
def get_translations(example):
    outputs = pipe(
        [
            {"role": "system", "content": "You are a skilled translator of Tibetan texts. You answer only with the requested translation and nothing else."},
            {"role": "user", "content": f"Translate from Tibetan to English: {example.get('Source', '')}"}
        ],
        max_new_tokens=256,
    )

    example['pred'] = outputs[0]["generated_text"][-1]['content'] if outputs else ""

    return example

ds = ds.map(get_translations, batched=True)


In [None]:
import evaluate

# Load BLEU and CHRF metrics
bleu_metric = evaluate.load("sacrebleu")
chrf_metric = evaluate.load("chrf")
ter_metric = evaluate.load("ter")

preds = ds['pred']
labels = ds['Target']

# Compute BLEU score
bleu_result = bleu_metric.compute(predictions=preds, references=labels)
bleu_score = bleu_result["score"]

# Compute CHRF score
chrf_result = chrf_metric.compute(predictions=preds, references=labels)
chrf_score = chrf_result["score"]

# Compute TER score
ter_result = ter_metric.compute(predictions=preds, references=labels)
ter_score = ter_result["score"]

metrics = {
        "bleu": round(bleu_score, 4),
        "chrf": round(chrf_score, 4),
        "ter": round(ter_score, 4)
    }

print("Computed Metrics:", metrics)