In [None]:
import json
import requests
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import pandas as pd
import numpy as np
import re

In [None]:
cache = {}

def fetch_wikidata_info(qid, languages=["en", "fr"]):
    """Fetch entity details (name + type) from Wikidata while avoiding recursion."""

    if qid in cache:  # Check cache first
        return cache[qid]

    url = f"https://www.wikidata.org/wiki/Special:EntityData/{qid}.json"

    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
    except Exception as e:
        print(f"Failed to fetch data for {qid}: {e}")
        return None

    entity_data = data.get("entities", {}).get(qid, {})

    # Extract entity name in multiple languages
    entity_labels = {lang: entity_data.get("labels", {}).get(lang, {}).get("value", "") for lang in languages}

    # Extract entity type (P31: "instance of") - Only English
    entity_type_qid = None
    claims = entity_data.get("claims", {}).get("P31", [])

    if claims:
        entity_type_qid = claims[0].get("mainsnak", {}).get("datavalue", {}).get("value", {}).get("id")

    entity_type_label = ""
    if entity_type_qid and entity_type_qid != qid:  # Prevent self-recursion
        url_type = f"https://www.wikidata.org/wiki/Special:EntityData/{entity_type_qid}.json"
        try:
            response_type = requests.get(url_type)
            response_type.raise_for_status()
            data_type = response_type.json()
            type_data = data_type.get("entities", {}).get(entity_type_qid, {}).get("labels", {})
            entity_type_label = type_data.get("en", {}).get("value", "")
        except Exception as e:
            print(f"Failed to fetch entity type for {entity_type_qid}: {e}")

    result = {
        "qid": qid,
        "entity_name": entity_labels,  # { "en": "Denali", "fr": "Denali" }
        "entity_type": entity_type_label  # "mountain"
    }

    cache[qid] = result  # Store in cache
    return result

In [None]:
def process_jsonl_file(input_path):
    """Process a JSONL file, enrich it with Wikidata info, and return the processed data."""

    enriched_data = []

    with open(input_path, "r", encoding="utf-8") as file:
        lines = file.readlines()

    for line in tqdm(lines, desc="Processing JSONL Entries"):
        entry = json.loads(line)
        enriched_entry = entry.copy()
        enriched_entry["enriched_entities"] = []

        for qid in entry.get("entities", []):
            entity_info = fetch_wikidata_info(qid)
            if entity_info:
                enriched_entry["enriched_entities"].append(entity_info)

        enriched_data.append(enriched_entry)

    return enriched_data

In [None]:
# input_path = r"D:\IIT Bh\LRNLP\Project\semeval.train.v2-e0d1c28b78c8dd4969d25eea5d3bc9cc\semeval\train\fr\train.jsonl"
# processed_data = process_jsonl_file(input_path)

In [None]:
# with open("train_with_e_etype.json", "w", encoding="utf-8") as f:
#     json.dump(processed_data, f, indent=4, ensure_ascii=False)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
with open("/content/drive/MyDrive/LRNLP-project/train_with_e_etype.json", encoding="utf-8") as file:
    processed_data = json.load(file)

In [None]:
from sklearn.model_selection import train_test_split

# Assuming 'processed_data' contains your loaded JSON data
train_data, test_data = train_test_split(processed_data, test_size=0.2, random_state=42)

print(f"Training data size: {len(train_data)}")
print(f"Testing data size: {len(test_data)}")


Training data size: 4424
Testing data size: 1107


In [None]:
len(train_data)

4424

In [None]:
train_data[0]

{'id': '7961f072',
 'source_locale': 'en',
 'target_locale': 'fr',
 'source': 'Is Quito the capital of Ecuador?',
 'target': 'Quito est-elle la capitale de l’Équateur ?',
 'entities': ['Q736'],
 'from': 'mintaka',
 'enriched_entities': [{'qid': 'Q736',
   'entity_name': {'en': 'Ecuador', 'fr': 'Équateur'},
   'entity_type': 'sovereign state'}]}

In [None]:
train_data[15:25]

[{'id': 'e09490dc',
  'source_locale': 'en',
  'target_locale': 'fr',
  'source': 'What location is based in Antarctica and is considered to be the driest place on Earth?',
  'target': 'Quel endroit est situé en Antarctique et est considéré comme l’endroit le plus sec de la planète ?',
  'entities': ['Q51'],
  'from': 'mintaka',
  'enriched_entities': [{'qid': 'Q51',
    'entity_name': {'en': 'Antarctica', 'fr': 'Antarctique'},
    'entity_type': 'continent'}]},
 {'id': '32b9d2f8',
  'source_locale': 'en',
  'target_locale': 'fr',
  'source': 'How old is the winner of the 2020 Nobel Prize in Literature?',
  'target': 'Quel âge a le lauréat du Prix Nobel de littérature 2020 ?',
  'entities': ['Q37922'],
  'from': 'mintaka',
  'enriched_entities': [{'qid': 'Q37922',
    'entity_name': {'en': 'Nobel Prize in Literature',
     'fr': 'prix Nobel de littérature'},
    'entity_type': 'literary award'}]},
 {'id': '2b3c4ae3',
  'source_locale': 'en',
  'target_locale': 'fr',
  'source': 'Which 

In [None]:
entity_types = [entity["entity_type"] for entry in train_data for entity in entry["enriched_entities"] if entity["entity_type"]]

In [None]:
len(set(entity_types))

409

In [None]:
Counter(entity_types)

Counter({'sovereign state': 1171,
         'main stream': 14,
         'house of representatives': 29,
         'video game series': 35,
         'literary work': 177,
         'intergovernmental organization': 2,
         'type of mass media': 18,
         'continent': 169,
         'lattice tower': 10,
         'part of the world': 20,
         'dominion of the British Empire': 13,
         'novel series': 128,
         'constituent country of the United Kingdom': 62,
         'U.S. state': 210,
         'literary award': 27,
         'world war': 127,
         'civil war': 34,
         'Hogwarts House': 7,
         'type of conflict': 16,
         'profession': 11,
         'territorial entity type': 66,
         'tournament': 68,
         'planetary moon': 7,
         'American football team': 82,
         'position': 66,
         'book series': 30,
         'award for best direction': 11,
         'peace award': 4,
         'award': 35,
         'United States executive order': 2,

In [None]:
def extract_entity_positions(sentence, entity_name):
    """Find the start and end positions of an entity in a sentence."""
    match = re.search(r'\b' + re.escape(entity_name) + r'\b', sentence)
    if match:
        return match.start(), match.end()
    return None, None

In [None]:
def prepare_dataset(processed_data):
    """Generate dataset for training entity recognition & classification."""
    dataset = []

    for entry in processed_data:
        source_sentence = entry["source"]

        for entity in entry["enriched_entities"]:
            entity_name = entity["entity_name"]["en"]
            entity_type = entity["entity_type"]
            translated_name = entity["entity_name"]["fr"]

            # Determine if entity should be kept or translated
            label = entity_type if entity_name == translated_name else "Other"

            # Extract entity position
            start_idx, end_idx = extract_entity_positions(source_sentence, entity_name)
            if start_idx is not None:
                dataset.append({
                    "source": source_sentence,
                    "entity_name": entity_name,
                    "entity_type": label,
                    "start_idx": start_idx,
                    "end_idx": end_idx
                })

    return dataset


In [None]:
df = prepare_dataset(train_data)

In [None]:
df = pd.DataFrame(df)

In [None]:
df.head()

Unnamed: 0,source,entity_name,entity_type,start_idx,end_idx
0,Is Quito the capital of Ecuador?,Ecuador,Other,24,31
1,Is the Mississippi River located in the United...,Mississippi River,Other,7,24
2,Is the Mississippi River located in the United...,United States,Other,40,53
3,How many of The Sims games can be played on mo...,The Sims,Other,12,20
4,When did the author of Pride and Prejudice die?,Pride and Prejudice,Other,23,42


In [None]:
len(df['entity_type'].unique())

43

In [None]:
df['entity_type'].value_counts()

Unnamed: 0_level_0,count
entity_type,Unnamed: 1_level_1
Other,3399
human,110
video game series,26
video game,24
musical group,11
film,8
comics character,6
single,5
media franchise,5
album,5


In [None]:
# !pip install transformers accelerate torch bitsandbytes

## Mapping to generalized categories

In [None]:
category_mapping = {
    "human": "Person",
    "part of the world": "Location",
    "city-state": "Location",
    "border city": "Location",
    "mountain": "Location",
    "locality": "Location",
    "city in the United States": "Location",
    "Municipality of Maryland": "Location",
    "U.S. state": "Location",
    "dominion of the British Empire": "Location",
    "archaeological culture": "Location",
    "sub-province-level division": "Location",

    "video game": "Video Game",
    "video game series": "Video Game",
    "video game character": "Video Game",
    "video game distribution platform": "Video Game",
    "expansion pack": "Video Game",
    "esports discipline": "Video Game",

    "musical group": "Music",
    "single": "Music",
    "album": "Music",
    "rock band": "Music",
    "hip-hop collective": "Music",
    "musical work/composition": "Music",
    "girl group": "Music",
    "boy band": "Music",

    "film": "Film & TV",
    "film series": "Film & TV",
    "animated film": "Film & TV",
    "television series": "Film & TV",
    "television film": "Film & TV",
    "media franchise": "Film & TV",

    "comics character": "Fictional Character",
    "fictional city": "Fictional Character",
    "fictional human": "Fictional Character",
    "Mario franchise character": "Fictional Character",
    "extraterrestrials in fiction": "Fictional Character",

    "sports competition": "Sports",
    "championship": "Sports",

    "business": "Business & Organization",
    "record label": "Business & Organization",
    "labor union": "Business & Organization",
}

# Apply mapping
df["generalized_entity_type"] = df["entity_type"].map(category_mapping).fillna("Other")


In [None]:
df['generalized_entity_type'].value_counts()

Unnamed: 0_level_0,count
generalized_entity_type,Unnamed: 1_level_1
Other,3402
Person,110
Video Game,54
Music,29
Location,18
Film & TV,18
Fictional Character,9
Sports,6
Business & Organization,3


## Trying with BERT

In [None]:
!pip install transformers datasets accelerate peft

Collecting datasets
  Downloading datasets-3.4.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12=

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification, TrainingArguments, Trainer
from datasets import Dataset
from peft import LoraConfig, get_peft_model, TaskType

# Define entity label mapping
label_map = {
    "Other": 0,
    "Person": 1,
    "Video Game": 2,
    "Music": 3,
    "Location": 4,
    "Film & TV": 5,
    "Fictional Character": 6,
    "Sports": 7,
    "Business & Organization": 8
}
num_labels = len(label_map)

# Load BERT tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

# Load BERT model for token classification
model = AutoModelForTokenClassification.from_pretrained("bert-base-cased", num_labels=num_labels)

# Move to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:  19%|#9        | 83.9M/436M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12

In [None]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["source"],
        truncation=True,
        padding="max_length",
        max_length=256,
        return_offsets_mapping=True
    )

    labels = [[label_map["Other"]] * len(tokenized_inputs["input_ids"][i]) for i in range(len(tokenized_inputs["input_ids"])) ] # Initialize labels for each example in batch

    for i, example in enumerate(zip(examples["start_idx"], examples["end_idx"], examples["entity_type"])):
        start, end, entity_type = example
        # Using offset_mapping for the current example in the batch
        for j, (offset_start, offset_end) in enumerate(tokenized_inputs["offset_mapping"][i]):
            if offset_start >= start and offset_end <= end:
                labels[i][j] = label_map.get(entity_type, 0) # Assign entity label for current example

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

# Convert Pandas DataFrame to Hugging Face Dataset
dataset = Dataset.from_pandas(df)
dataset = dataset.map(tokenize_and_align_labels, batched=True)

# Split into train and validation
dataset = dataset.train_test_split(test_size=0.1)
train_dataset = dataset["train"]
val_dataset = dataset["test"]


Map:   0%|          | 0/3649 [00:00<?, ? examples/s]

In [None]:
# LoRA config
config = LoraConfig(
    task_type=TaskType.TOKEN_CLS,
    r=8,  # LoRA rank
    lora_alpha=32,  # Scaling factor
    lora_dropout=0.05
)
model = get_peft_model(model, config)


In [None]:
training_args = TrainingArguments(
    output_dir="./bert-ner-results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-4,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=10,
    weight_decay=0.01,
    push_to_hub=False,
    fp16=True,  # Mixed precision
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer
)

# Train the model
trainer.train()


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,0.0
2,0.000000,0.0
3,0.000000,0.0
4,0.000000,0.0
5,0.000000,0.0
6,0.000000,0.0
7,0.000000,0.0
8,0.000000,0.0
9,0.000000,0.0
10,0.000000,0.0


TrainOutput(global_step=4110, training_loss=3.026632666859749e-08, metrics={'train_runtime': 401.8034, 'train_samples_per_second': 81.732, 'train_steps_per_second': 10.229, 'total_flos': 4305989408808960.0, 'train_loss': 3.026632666859749e-08, 'epoch': 10.0})

In [None]:
model.save_pretrained("bert-multi-entity")
tokenizer.save_pretrained("bert-multi-entity")

from transformers import pipeline

ner_pipeline = pipeline("ner", model="bert-multi-entity", tokenizer="bert-multi-entity")

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cuda:0


[{'entity': 'LABEL_0', 'score': np.float32(0.5010545), 'index': 1, 'word': 'Is', 'start': 0, 'end': 2}, {'entity': 'LABEL_0', 'score': np.float32(0.50382966), 'index': 2, 'word': 'Q', 'start': 3, 'end': 4}, {'entity': 'LABEL_1', 'score': np.float32(0.57879764), 'index': 3, 'word': '##uit', 'start': 4, 'end': 7}, {'entity': 'LABEL_0', 'score': np.float32(0.545436), 'index': 4, 'word': '##o', 'start': 7, 'end': 8}, {'entity': 'LABEL_0', 'score': np.float32(0.5343731), 'index': 5, 'word': 'the', 'start': 9, 'end': 12}, {'entity': 'LABEL_0', 'score': np.float32(0.6032237), 'index': 6, 'word': 'capital', 'start': 13, 'end': 20}, {'entity': 'LABEL_0', 'score': np.float32(0.6356115), 'index': 7, 'word': 'of', 'start': 21, 'end': 23}, {'entity': 'LABEL_1', 'score': np.float32(0.55447274), 'index': 8, 'word': 'Ecuador', 'start': 24, 'end': 31}, {'entity': 'LABEL_0', 'score': np.float32(0.6833871), 'index': 9, 'word': '?', 'start': 31, 'end': 32}]


In [None]:
text = "Is Quito the capital of Ecuador?"
print(ner_pipeline(text))

[{'entity': 'LABEL_0', 'score': np.float32(0.5010545), 'index': 1, 'word': 'Is', 'start': 0, 'end': 2}, {'entity': 'LABEL_0', 'score': np.float32(0.50382966), 'index': 2, 'word': 'Q', 'start': 3, 'end': 4}, {'entity': 'LABEL_1', 'score': np.float32(0.57879764), 'index': 3, 'word': '##uit', 'start': 4, 'end': 7}, {'entity': 'LABEL_0', 'score': np.float32(0.545436), 'index': 4, 'word': '##o', 'start': 7, 'end': 8}, {'entity': 'LABEL_0', 'score': np.float32(0.5343731), 'index': 5, 'word': 'the', 'start': 9, 'end': 12}, {'entity': 'LABEL_0', 'score': np.float32(0.6032237), 'index': 6, 'word': 'capital', 'start': 13, 'end': 20}, {'entity': 'LABEL_0', 'score': np.float32(0.6356115), 'index': 7, 'word': 'of', 'start': 21, 'end': 23}, {'entity': 'LABEL_1', 'score': np.float32(0.55447274), 'index': 8, 'word': 'Ecuador', 'start': 24, 'end': 31}, {'entity': 'LABEL_0', 'score': np.float32(0.6833871), 'index': 9, 'word': '?', 'start': 31, 'end': 32}]


In [None]:
df_filtered = df[df['entity_type'] != 'Other']

In [None]:
df_filtered.reset_index(drop=True, inplace=True)

In [None]:
df_filtered['source'][0]

"What is the estimated population of Europe's most northern capital city?"

In [None]:
input_text = "What is the estimated population of Europe's most northern capital city?"

# Tokenize input text
input_encoding = tokenizer(input_text, padding=True, truncation=True, max_length=512, return_tensors="pt")

# Move input to the same device as the model (CPU/GPU)
input_encoding = {key: val.to(model.device) for key, val in input_encoding.items()}


In [None]:
# Get model predictions
model.eval()
with torch.no_grad():
    outputs = model(**input_encoding)

# Convert logits to predicted entity labels
predicted_labels = torch.argmax(outputs.logits, dim=-1).cpu().numpy()[0]


In [None]:
tokens = tokenizer.convert_ids_to_tokens(input_encoding["input_ids"][0])

# Reverse label_map to get label names from indices
reverse_label_map = {v: k for k, v in label_map.items()}

# Map predicted labels to their names
entity_labels = [reverse_label_map[label] for label in predicted_labels]

# Print results
for token, label in zip(tokens, entity_labels):
    print(f"{token}: {label}")

[CLS]: Other
What: Other
is: Other
the: Other
estimated: Other
population: Other
of: Other
Europe: Other
': Other
s: Other
most: Other
northern: Other
capital: Other
city: Other
?: Other
[SEP]: Other


In [None]:
df_test = prepare_dataset(test_data)

In [None]:
df_test= pd.DataFrame(df_test)

In [None]:
df_test.head()

Unnamed: 0,source,entity_name,entity_type,start_idx,end_idx
0,Who was president of the United States and pre...,United States,Other,25,38
1,Who was president of the United States and pre...,Screen Actors Guild,labor union,60,79
2,How many boroughs does New York City have?,New York City,Other,23,36
3,Where was the author of Anne of Green Gables b...,Anne of Green Gables,Other,24,44
4,"Which state has more Congressional districts, ...",California,Other,46,56


In [None]:
df_test['entity_type'].value_counts()

Unnamed: 0_level_0,count
entity_type,Unnamed: 1_level_1
Other,839
human,24
video game,13
film,8
video game series,7
album,4
musical group,4
comics character,3
championship,2
labor union,1


In [None]:

df_test["generalized_entity_type"] = df_test["entity_type"].map(category_mapping).fillna("Other")

In [None]:
df_test['generalized_entity_type'].value_counts()

Unnamed: 0_level_0,count
generalized_entity_type,Unnamed: 1_level_1
Other,843
Person,24
Video Game,20
Film & TV,10
Music,9
Fictional Character,4
Sports,2
Location,1
Business & Organization,1


In [None]:
import torch
from sklearn.metrics import classification_report
import numpy as np

# Step 1: Function to Get Token-Level Predictions
def get_predictions(model, tokenizer, texts):
    model.eval()  # Set model to evaluation mode
    predictions = []

    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512, return_offsets_mapping=True)
        offset_mapping = inputs.pop("offset_mapping")[0].tolist()  # Used for aligning tokens

        # Move to GPU if available
        if torch.cuda.is_available():
            model = model.to("cuda")
            inputs = {key: val.to("cuda") for key, val in inputs.items()}

        # Forward pass
        with torch.no_grad():
            outputs = model(**inputs)

        logits = outputs.logits  # Shape: (batch_size, sequence_length, num_classes)
        predicted_ids = torch.argmax(logits, dim=-1)[0]  # Get class with highest probability (ignore batch dim)

        # Convert token predictions to entity labels
        predicted_labels = []
        for idx, (token_id, (start, end)) in enumerate(zip(predicted_ids, offset_mapping)):
            if start == 0 and end == 0:
                continue  # Ignore special tokens ([CLS], [SEP], padding)

            predicted_labels.append(token_id.item())  # Store as integer

        predictions.append(predicted_labels)

    return predictions

# Step 2: Align True and Predicted Labels
true_labels = df_test['generalized_entity_type'].tolist()  # Ground truth entity labels
test_texts = df_test['source'].tolist()  # Input text data

predicted_labels = get_predictions(model, tokenizer, test_texts)  # Get model predictions

# Convert string labels to numeric using label_map
true_labels_flat = []
predicted_labels_flat = []

for i, entity_label in enumerate(true_labels):
    tokens = tokenizer.tokenize(test_texts[i])  # Tokenize the text
    label_id = label_map[entity_label]  # Convert label to integer
    true_labels_flat.extend([label_id] * len(tokens))  # Repeat entity label for all tokens
    predicted_labels_flat.extend(predicted_labels[i])

# Ensure the lists have the same length
min_length = min(len(true_labels_flat), len(predicted_labels_flat))
true_labels_flat = true_labels_flat[:min_length]
predicted_labels_flat = predicted_labels_flat[:min_length]

# Step 3: Classification Report INCLUDING "Other"
print("\n🔹 Classification Report (Including 'Other'):")
print(classification_report(true_labels_flat, predicted_labels_flat, target_names=list(label_map.keys())))



🔹 Classification Report (Including 'Other'):
                         precision    recall  f1-score   support

                  Other       0.92      1.00      0.96     10820
                 Person       0.00      0.00      0.00       311
             Video Game       0.00      0.00      0.00       302
                  Music       0.00      0.00      0.00       139
               Location       0.00      0.00      0.00        15
              Film & TV       0.00      0.00      0.00       132
    Fictional Character       0.00      0.00      0.00        54
                 Sports       0.00      0.00      0.00        22
Business & Organization       0.00      0.00      0.00        15

               accuracy                           0.92     11810
              macro avg       0.10      0.11      0.11     11810
           weighted avg       0.84      0.92      0.88     11810



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
