In [1]:
!pip install evaluate seqeval -qqq

In [2]:
from huggingface_hub import login
import wandb
from datasets import load_dataset
from transformers import AutoTokenizer
from transformers import DataCollatorForTokenClassification
import evaluate
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer
import numpy as np

wandb.login(key="")
login(token="")
ds = load_dataset("chuuhtetnaing/myanmar-ner-dataset")

seqeval = evaluate.load("seqeval")
tokenizer = AutoTokenizer.from_pretrained("chuuhtetnaing/myanmar-pos-model")
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mchuu[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Downloading builder script: 0.00B [00:00, ?B/s]

In [3]:
id2label = {i: l for i, l in enumerate(ds['train'].features['ner_tags'].feature.names)}
label2id = {l: i for i, l in enumerate(ds['train'].features['ner_tags'].feature.names)}

In [4]:
id2label

{0: 'B-DATE',
 1: 'I-DATE',
 2: 'B-LOC',
 3: 'I-LOC',
 4: 'B-NUM',
 5: 'I-NUM',
 6: 'B-ORG',
 7: 'I-ORG',
 8: 'B-PER',
 9: 'I-PER',
 10: 'B-TIME',
 11: 'I-TIME',
 12: 'O'}

In [5]:
label2id

{'B-DATE': 0,
 'I-DATE': 1,
 'B-LOC': 2,
 'I-LOC': 3,
 'B-NUM': 4,
 'I-NUM': 5,
 'B-ORG': 6,
 'I-ORG': 7,
 'B-PER': 8,
 'I-PER': 9,
 'B-TIME': 10,
 'I-TIME': 11,
 'O': 12}

In [6]:
num_labels = len(ds['train'].features['ner_tags'].feature.names)
num_labels

13

In [7]:
model = AutoModelForTokenClassification.from_pretrained(
    "chuuhtetnaing/myanmar-pos-model", num_labels=num_labels, id2label=id2label, label2id=label2id, ignore_mismatched_sizes=True
)

Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at chuuhtetnaing/myanmar-pos-model and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([29]) in the checkpoint and torch.Size([13]) in the model instantiated
- classifier.weight: found shape torch.Size([29, 768]) in the checkpoint and torch.Size([13, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
label_list = ds["train"].features[f"ner_tags"].feature.names

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

In [9]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)

    labels = []
    for i, label in enumerate(examples[f"ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100.
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

In [10]:
tokenized_ds = ds.map(tokenize_and_align_labels, batched=True)

In [11]:
tokenized_ds

DatasetDict({
    train: Dataset({
        features: ['tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 12825
    })
    test: Dataset({
        features: ['tokens', 'ner_tags', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 3207
    })
})

In [12]:
training_args = TrainingArguments(
    output_dir="myanmar_ner_model",
    learning_rate=2e-5,
    per_device_train_batch_size=100,
    per_device_eval_batch_size=100,
    num_train_epochs=30,
    weight_decay=0.01,
    eval_strategy="epoch", #"steps",
    save_strategy="epoch", #"steps",
    load_best_model_at_end=True,
    push_to_hub=True,
    hub_private_repo=True,
    # eval_steps=1000,
    # save_steps=1000,
    logging_steps=10,
    logging_strategy="steps",
    save_total_limit=5,
    # hub_strategy="all_checkpoints",
    save_safetensors=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    report_to=["wandb", "tensorboard"],
    gradient_accumulation_steps=8,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds["train"],
    eval_dataset=tokenized_ds["test"],
    processing_class=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

In [13]:
wandb.init(project="myanmar-ner-fine-tuning")

In [14]:
trainer.train()

Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,1.5385,0.373009,0.539736,0.506763,0.52273,0.917514
2,0.2673,0.180883,0.727085,0.795762,0.759875,0.948127
3,0.1623,0.129468,0.781479,0.840848,0.810077,0.963668
4,0.1291,0.101471,0.783573,0.860234,0.820116,0.971045
5,0.0992,0.096467,0.819967,0.894274,0.85551,0.971888
6,0.0801,0.087866,0.829911,0.901939,0.864427,0.973794
7,0.0706,0.081871,0.857959,0.913661,0.884934,0.976474
8,0.0636,0.076828,0.865984,0.914788,0.889717,0.977952
9,0.0577,0.075721,0.878416,0.920198,0.898822,0.978436
10,0.0527,0.07598,0.873732,0.912534,0.892711,0.979113


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


TrainOutput(global_step=510, training_loss=0.09467308725796494, metrics={'train_runtime': 2707.8493, 'train_samples_per_second': 142.087, 'train_steps_per_second': 0.188, 'total_flos': 3.192321433027245e+16, 'train_loss': 0.09467308725796494, 'epoch': 30.0})

In [18]:
print(f"Best metric: {trainer.state.best_metric}")
print(f"Best model checkpoint: {trainer.state.best_model_checkpoint}")

Best metric: 0.909736083388778
Best model checkpoint: myanmar_ner_model/checkpoint-255


In [20]:
import json
from huggingface_hub import HfApi

# Get log history directly from trainer
log_history = trainer.state.log_history

def fmt(val):
    """Format value, handle None"""
    return f"{val:.4f}" if val is not None else "N/A"

# Get eval logs with epoch info
eval_logs = [log for log in log_history if "eval_loss" in log]
train_logs = {log["step"]: log for log in log_history if "loss" in log and "eval_loss" not in log}

table_rows = []
table_rows.append("| Epoch | Training Loss | Validation Loss | Precision | Recall | F1 | Accuracy |")
table_rows.append("|-------|---------------|-----------------|-----------|--------|------|----------|")

def get_nearest_train_loss(step, train_logs):
    if not train_logs:
        return None
    # Find closest step that's <= current step
    valid_steps = [s for s in train_logs.keys() if s <= step]
    if valid_steps:
        nearest = max(valid_steps)
        return train_logs[nearest]["loss"]
    return None

# Then in your loop:
for e in eval_logs:
    epoch = e.get("epoch")
    step = e.get("step")
    t = get_nearest_train_loss(step, train_logs)

    table_rows.append(
        f"| {epoch:.0f} | {fmt(t)} | {fmt(e.get('eval_loss'))} | {fmt(e.get('eval_precision'))} | {fmt(e.get('eval_recall'))} | {fmt(e.get('eval_f1'))} | {fmt(e.get('eval_accuracy'))} |"
    )

readme = f"""---
license: apache-2.0
base_model: chuuhtetnaing/myanmar-pos-model
tags:
  - token-classification
  - myanmar
  - ner-tagging
language:
  - my
datasets:
  - chuuhtetnaing/myanmar-ner-dataset
metrics:
  - f1
---

# Myanmar ner Tagging Model

Fine-tuned [myanmar-pos-model](https://huggingface.co/chuuhtetnaing/myanmar-pos-model) for Myanmar NER tagging.

## Training Results

{chr(10).join(table_rows)}

## Training Details

| Parameter | Value |
|-----------|-------|
| Base Model | chuuhtetnaing/myanmar-pos-model |
| Total Epochs | {trainer.state.epoch:.0f} |
| Total Steps | {trainer.state.global_step} |
| Best Checkpoint | {trainer.state.best_model_checkpoint.split("/")[1]} |
| Best F1 | {fmt(trainer.state.best_metric)} |

## Usage
```python
from transformers import pipeline

nlp = pipeline("token-classification", model="chuuhtetnaing/myanmar_ner_model", aggregation_strategy="simple")
result = nlp("သူသည်ကျောင်းသို့သွားသည်။")
```

## NER Labels

| Tag | Description |
|-----|-------------|
| B-DATE | Beginning of Date |
| I-DATE | Inside Date |
| B-LOC | Beginning of Location |
| I-LOC | Inside Location |
| B-NUM | Beginning of Number |
| I-NUM | Inside Number |
| B-ORG | Beginning of Organization |
| I-ORG | Inside Organization |
| B-PER | Beginning of Person |
| I-PER | Inside Person |
| B-TIME | Beginning of Time |
| I-TIME | Inside Time |
| O | Outside (Not an entity) |
"""

# Upload README
api = HfApi()
api.upload_file(
    path_or_fileobj=readme.encode(),
    path_in_repo="README.md",
    repo_id="chuuhtetnaing/myanmar_ner_model",
    commit_message="Add training results"
)
print("✅ Done!")

✅ Done!
