In [3]:
pip install torch transformers peft

Collecting torch
  Using cached torch-2.9.1-cp313-cp313-win_amd64.whl.metadata (30 kB)
Collecting transformers
  Using cached transformers-4.57.3-py3-none-any.whl.metadata (43 kB)
Collecting peft
  Using cached peft-0.18.0-py3-none-any.whl.metadata (14 kB)
Collecting filelock (from torch)
  Using cached filelock-3.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting jinja2 (from torch)
  Using cached jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting fsspec>=0.8.5 (from torch)
  Using cached fsspec-2025.10.0-py3-none-any.whl.metadata (10 kB)
Collecting huggingface-hub<1.0,>=0.34.0 (from transformers)
  Using cached huggingface_hub-0.36.0-py3-none-any.whl.metadata (14 kB)
Collecting requests (from transformers)
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers)
  Using cached tokenizers-0.22.1-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting accelerate>=0.21.0 (from peft)
  Using cached accelerate-1.12.0


[notice] A new release of pip is available: 24.3.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:


import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification
from peft import PeftModel
import os

# --- CONFIGURATION ---
NER_TAGS = ['O', 'B-Location', 'I-Location', 'B-Person', 'I-Person', 'B-Organization', 'I-Organization']
id2label = {i: tag for i, tag in enumerate(NER_TAGS)}
label2id = {tag: i for i, tag in enumerate(NER_TAGS)}

MODEL_CHECKPOINT = "xlm-roberta-base"
LOCAL_MODEL_PATH = "D:/nlp_proj/xlmroberta_ner_results/checkpoint-2225" 

# --- LOAD MODEL ---
print("Loading Model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_CHECKPOINT)
base_model = AutoModelForTokenClassification.from_pretrained(
    MODEL_CHECKPOINT, 
    num_labels=len(NER_TAGS), 
    id2label=id2label, 
    label2id=label2id
)

# Load your LoRA adapters
model = PeftModel.from_pretrained(base_model, LOCAL_MODEL_PATH)
model.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Model loaded on {device}")

  from .autonotebook import tqdm as notebook_tqdm


Loading Model...


Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded on cpu


In [73]:

def get_entities(roman_sentence):
    # 1. Normalize
    clean_roman = normalize_roman_urdu(roman_sentence, FINAL_NORMALIZATION_MAP)
    # 2. Transliterate
    urdu_script = translateToUrdu(clean_roman)
    # 3. Predict
    inputs = tokenizer(urdu_script, return_tensors="pt").to(device)
    with torch.no_grad():
        logits = model(**inputs).logits
    predictions = torch.argmax(logits, dim=2)
    # 4. Format Output
    tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])
    pred_labels = [id2label[p.item()] for p in predictions[0]]
    
    results = []
    for token, label in zip(tokens, pred_labels):
        if token not in ['<s>', '</s>']:
            clean_token = token.replace(' ', ' ')
            # Only show entities, ignore 'O'
            if label != 'O':
                results.append((clean_token, label))
                
    return clean_roman, urdu_script, results

raw_text = "lahor ( ain ain aaii ) adakar wasim abas bhi arzi tor pr krachi shft ho giay"
clean, urdu, entities = get_entities(raw_text)

print(f"Input:    {raw_text}")
print(f"Urdu:     {urdu}")
print("Entities:", entities)

Input:    lahor ( ain ain aaii ) adakar wasim abas bhi arzi tor pr krachi shft ho giay
Urdu:     لاھور ( این این آئی ) اداکار وسیم abas بھی ارضی طور پر کراچی شفٹ حو گئے
Entities: [('▁لا', 'B-Location'), ('ھ', 'I-Location'), ('ور', 'I-Location'), ('▁و', 'B-Person'), ('سیم', 'I-Person'), ('▁a', 'I-Person'), ('bas', 'I-Person'), ('▁کراچی', 'B-Location')]


In [4]:
from normalizer import normalize_roman_urdu, FINAL_NORMALIZATION_MAP
from main import translateToUrdu

In [3]:
pip install translators 

Collecting translators
  Using cached translators-6.0.1-py3-none-any.whl.metadata (70 kB)
Collecting httpx>=0.28.1 (from translators)
  Using cached httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting niquests>=3.14.0 (from translators)
  Using cached niquests-3.15.2-py3-none-any.whl.metadata (16 kB)
Collecting exejs>=0.0.4 (from translators)
  Using cached exejs-0.0.6-py3-none-any.whl.metadata (5.1 kB)
Collecting pathos>=0.3.4 (from translators)
  Using cached pathos-0.3.4-py3-none-any.whl.metadata (11 kB)
Collecting cloudscraper>=1.2.71 (from translators)
  Using cached cloudscraper-1.2.71-py2.py3-none-any.whl.metadata (19 kB)
Collecting cryptography>=42.0.4 (from translators)
  Using cached cryptography-46.0.3-cp311-abi3-win_amd64.whl.metadata (5.7 kB)
Collecting pyparsing>=2.4.7 (from cloudscraper>=1.2.71->translators)
  Using cached pyparsing-3.2.5-py3-none-any.whl.metadata (5.0 kB)
Collecting requests-toolbelt>=0.9.1 (from cloudscraper>=1.2.71->translators)
  Using cached 


[notice] A new release of pip is available: 24.3.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip
