In [None]:
!pip install -q evaluate sacrebleu bert_score nltk

In [None]:
import pandas as pd
import evaluate
import numpy as np
import nltk

nltk.download('wordnet', quiet=True)
nltk.download('punkt', quiet=True)
nltk.download('omw-1.4', quiet=True)

DATASET_PATH = "/content/BNLP validation.csv"

df = pd.read_csv(DATASET_PATH)

required_cols = ["Chittagong Language", "Romanized Chittagong Language", "Romanized Chittagong Language by Native",
                 "Sylhet Language", "Romanized Sylhet Language", "Romanized Sylhet Language by Native"]
assert all(col in df.columns for col in required_cols), \
    f"CSV must have columns: {required_cols}"

preds_chittagong = df["Romanized Chittagong Language"].astype(str).tolist()
refs_chittagong_native = df["Romanized Chittagong Language by Native"].astype(str).tolist()

preds_sylhet = df["Romanized Sylhet Language"].astype(str).tolist()
refs_sylhet_native = df["Romanized Sylhet Language by Native"].astype(str).tolist()

refs_chittagong_wrapped = [[r] for r in refs_chittagong_native]
refs_sylhet_wrapped = [[r] for r in refs_sylhet_native]

bleu_metric = evaluate.load("sacrebleu")
meteor_metric = evaluate.load("meteor")
bertscore_metric = evaluate.load("bertscore")

def compute_metrics(preds, refs, refs_wrapped):
    bleu = bleu_metric.compute(predictions=preds, references=refs_wrapped)
    meteor = meteor_metric.compute(predictions=preds, references=refs)
    bertscore = bertscore_metric.compute(predictions=preds, references=refs, lang="bn")
    return {
        "BLEU": round(bleu["score"], 4),
        "METEOR": round(meteor["meteor"], 4),
        "BERTScore_F1": round(float(np.mean(bertscore["f1"])), 4)
    }

results_chittagong = compute_metrics(preds_chittagong, refs_chittagong_native, refs_chittagong_wrapped)
results_sylhet = compute_metrics(preds_sylhet, refs_sylhet_native, refs_sylhet_wrapped)

print("\n--- Validation Results (Chittagong: LLM vs Native) ---")
for k, v in results_chittagong.items():
    print(f"{k:<15}: {v}")

print("\n--- Validation Results (Sylhet: LLM vs Native) ---")
for k, v in results_sylhet.items():
    print(f"{k:<15}: {v}")
print("-------------------------------------------")

print("\n=== Chittagong Language Samples ===")
for i in range(min(5, len(df))):
    print(f"\nSample #{i+1}")
    print(f"Chittagong (Bangla Script): {df['Chittagong Language'][i]}")
    print(f"LLM Romanized:              {df['Romanized Chittagong Language'][i]}")
    print(f"Native Romanized:           {df['Romanized Chittagong Language by Native'][i]}")

print("\n=== Sylhet Language Samples ===")
for i in range(min(5, len(df))):
    print(f"\nSample #{i+1}")
    print(f"Sylhet (Bangla Script):     {df['Sylhet Language'][i]}")
    print(f"LLM Romanized:              {df['Romanized Sylhet Language'][i]}")
    print(f"Native Romanized:           {df['Romanized Sylhet Language by Native'][i]}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Downloading builder script: 0.00B [00:00, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/714M [00:00<?, ?B/s]


--- Validation Results (Chittagong: LLM vs Native) ---
BLEU           : 79.9174
METEOR         : 0.7745
BERTScore_F1   : 0.9626

--- Validation Results (Sylhet: LLM vs Native) ---
BLEU           : 56.7109
METEOR         : 0.7227
BERTScore_F1   : 0.9519
-------------------------------------------

=== Chittagong Language Samples ===

Sample #1
Chittagong (Bangla Script): আমি ইক্কে যাইয়ুম 
LLM Romanized:              Ami ikke zaiyum
Native Romanized:           Ami ikke jaiyyum

Sample #2
Chittagong (Bangla Script): এক্কান সবক হইছ
LLM Romanized:              Ekkan sobok hoiso
Native Romanized:           Ekkan sobok hoise

Sample #3
Chittagong (Bangla Script): আর হইতর গম লায়
LLM Romanized:              Ar hoitor gom lay
Native Romanized:           Ar hoitor gom lay

Sample #4
Chittagong (Bangla Script): আর হোয়াল ভালা
LLM Romanized:              Ar hoal vala
Native Romanized:           Ar hoyal bwala

Sample #5
Chittagong (Bangla Script): তেঁই ক্যান গরি ঝুরার সা
LLM Romanized:          