## Evaluating PIQN NER Model on MultiCoNER

Ali and Dhanush

In [1]:
import argparse
from datasets import load_dataset
import json
import os

#### Pre Processing

In [2]:
CONVERSION_DICT = {
    "Facility": "location-facility",
    "OtherLOC": "location-other",
    "HumanSettlement": "location-human_settlement",
    "Station": "location-station",
    "VisualWork": "creative-visual",
    "MusicalWork": "creative-music",
    "WrittenWork": "creative-written",
    "ArtWork": "creative-art",
    "Software": "creative-software",
    "MusicalGRP": "group-musical",
    "PublicCorp": "group-public_corp",
    "PrivateCorp": "group-private_corp",
    "AerospaceManufacturer": "group-aerospace_manufacturer",
    "SportsGRP": "group-sports",
    "CarManufacturer": "group-car_manufacturer",
    "ORG": "group-organization",
    "Scientist": "person-scientist",
    "Artist": "person-artist",
    "Athlete": "person-athlete",
    "Politician": "person-politician",
    "Cleric": "person-cleric",
    "SportsManager": "person-sports_manager",
    "OtherPER": "person-other",
    "Clothing": "product-clothing",
    "Vehicle": "product-vehicle",
    "Food": "product-food",
    "Drink": "product-drink",
    "OtherPROD": "product-other",
    "Medication/Vaccine": "medical-medication",
    "MedicalProcedure": "medical-procedure",
    "AnatomicalStructure": "medical-anatomy",
    "Symptom": "medical-symptom",
    "Disease": "medical-disease"
}

In [3]:
# POS Tagger
def pos_tag_tokens(tokens, pos_tagger):

    # sentence = Sentence(tokens)
    # pos_tagger.predict(sentence)
    pos_tags = ["NOUN" for token in tokens]

    return pos_tags


def convert_tokens(sample, pos_tagger):
    """
    Creates a dictionary that maps tokens to their corresponding POS tags.

    Args:
        sample : A data sample containing tokens, NER tags and id
        pos_tagger : An instance of flair.SequenceTagger

    Returns:
        converted_dict: a dictionary that maps the MultiCoNER sample to a PIQN sample.
    """

    converted_dict = {
        "tokens": sample['tokens'],
        "entities": extract_entities(sample['ner_tags']),
        "org_id": sample["id"],
        "relations": {},
        "pos": pos_tag_tokens(sample['tokens'], pos_tagger),
        "ltokens": [],
        "rtokens": [],
    }

    return converted_dict


def extract_entities(ner_tags):
    entities = []
    entity_dict = {}
    entity = ""
    start = -1
    for idx, tag in enumerate(ner_tags):
        if tag == "O":
            if entity == "":
                continue
            elif start!=-1:
                entity_dict["type"] = entity
                entity_dict["start"] = start
                entity_dict["end"] = idx
                entities.append(entity_dict)
                entity = ""
                start = -1
                entity_dict = {}
        elif tag.split("-")[0] == "B":
            entity = CONVERSION_DICT[tag.split("-")[1]]
            start = idx
        elif tag.split("-")[0] == "I" and entity != "":
            continue

    return entities

In [None]:
# if __name__ == "__main__":

#     parser = argparse.ArgumentParser()
#     parser.add_argument("--language", type=str, default="German (DE)")
#     parser.add_argument("--split", type=str, default="train")
#     parser.add_argument("--save-dir", type=str, default="./data/multiconer")

#     args = parser.parse_args()
#     language = args.language
#     split = args.split
#     save_dir = args.save_dir

#     # pos_tagger = SequenceTagger.load("flair/upos-multi")
#     dataset = load_dataset("MultiCoNER/multiconer_v2", language)
#     converted_json = [convert_tokens(example, None) for example in dataset[split]]

#     with open(save_dir + os.path.sep + f"MultiCoNER_{language.split()[0]}_{split}.json", "w") as f:
#         json.dump(converted_json, f)

In [None]:
def process_language_splits(language, save_dir="./data/multiconer"):
    """
    Process and save train, test, and validation splits for a specific language.
    
    Args:
        language (str): The language to process (e.g., "German (DE)").
        save_dir (str): Directory to save the processed files.
    """
    dataset = load_dataset("MultiCoNER/multiconer_v2", language)

    os.makedirs(save_dir, exist_ok=True)


    for split in ["train", "validation", "test"]:
        print(f"Processing {split} split for language: {language}")
        
        converted_data = [convert_tokens(example,None) for example in dataset[split]]
        
        save_path = os.path.join(save_dir, f"MultiCoNER_{language.split()[0]}_{split}.json")
        with open(save_path, "w", encoding="utf-8") as f:
            json.dump(converted_data, f, ensure_ascii=False, indent=4)
        
        print(f"Saved: {save_path}")

In [8]:
languages = [
    "Bangla (BN)",
    "Chinese (ZH)",
    "English (EN)",
    "Farsi (FA)",
    "French (FR)",
    "German (DE)",
    "Hindi (HI)",
    "Italian (IT)",
    "Multilingual (MULTI)",
    "Portuguese (PT)",
    "Spanish (ES)",
    "Swedish (SV)",
    "Ukrainian (UK)"
]

for lang in languages:
    process_language_splits(lang, save_dir="./data/multiconer")


0000.parquet:   0%|          | 0.00/1.24M [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/81.1k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/2.60M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9708 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/507 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/19859 [00:00<?, ? examples/s]

Processing train split for language: Bangla (BN)
Saved: ./data/multiconer\MultiCoNER_Bangla_train.json
Processing validation split for language: Bangla (BN)
Saved: ./data/multiconer\MultiCoNER_Bangla_validation.json
Processing test split for language: Bangla (BN)
Saved: ./data/multiconer\MultiCoNER_Bangla_test.json


0000.parquet:   0%|          | 0.00/1.14M [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/71.8k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/2.35M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9759 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/506 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/20265 [00:00<?, ? examples/s]

Processing train split for language: Chinese (ZH)
Saved: ./data/multiconer\MultiCoNER_Chinese_train.json
Processing validation split for language: Chinese (ZH)
Saved: ./data/multiconer\MultiCoNER_Chinese_validation.json
Processing test split for language: Chinese (ZH)
Saved: ./data/multiconer\MultiCoNER_Chinese_test.json
Processing train split for language: English (EN)
Saved: ./data/multiconer\MultiCoNER_English_train.json
Processing validation split for language: English (EN)
Saved: ./data/multiconer\MultiCoNER_English_validation.json
Processing test split for language: English (EN)
Saved: ./data/multiconer\MultiCoNER_English_test.json
Processing train split for language: Farsi (FA)
Saved: ./data/multiconer\MultiCoNER_Farsi_train.json
Processing validation split for language: Farsi (FA)
Saved: ./data/multiconer\MultiCoNER_Farsi_validation.json
Processing test split for language: Farsi (FA)
Saved: ./data/multiconer\MultiCoNER_Farsi_test.json
Processing train split for language: French

0000.parquet:   0%|          | 0.00/1.27M [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/78.2k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/2.59M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9785 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/512 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/20145 [00:00<?, ? examples/s]

Processing train split for language: German (DE)
Saved: ./data/multiconer\MultiCoNER_German_train.json
Processing validation split for language: German (DE)
Saved: ./data/multiconer\MultiCoNER_German_validation.json
Processing test split for language: German (DE)
Saved: ./data/multiconer\MultiCoNER_German_test.json
Processing train split for language: Hindi (HI)
Saved: ./data/multiconer\MultiCoNER_Hindi_train.json
Processing validation split for language: Hindi (HI)
Saved: ./data/multiconer\MultiCoNER_Hindi_validation.json
Processing test split for language: Hindi (HI)
Saved: ./data/multiconer\MultiCoNER_Hindi_test.json
Processing train split for language: Italian (IT)
Saved: ./data/multiconer\MultiCoNER_Italian_train.json
Processing validation split for language: Italian (IT)
Saved: ./data/multiconer\MultiCoNER_Italian_validation.json
Processing test split for language: Italian (IT)
Saved: ./data/multiconer\MultiCoNER_Italian_test.json


0000.parquet:   0%|          | 0.00/20.8M [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/58.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/170824 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/8895 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/358668 [00:00<?, ? examples/s]

Processing train split for language: Multilingual (MULTI)
Saved: ./data/multiconer\MultiCoNER_Multilingual_train.json
Processing validation split for language: Multilingual (MULTI)
Saved: ./data/multiconer\MultiCoNER_Multilingual_validation.json
Processing test split for language: Multilingual (MULTI)
Saved: ./data/multiconer\MultiCoNER_Multilingual_test.json


0000.parquet:   0%|          | 0.00/1.99M [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/116k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/27.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/16469 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/854 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/229490 [00:00<?, ? examples/s]

Processing train split for language: Portuguese (PT)
Saved: ./data/multiconer\MultiCoNER_Portuguese_train.json
Processing validation split for language: Portuguese (PT)
Saved: ./data/multiconer\MultiCoNER_Portuguese_validation.json
Processing test split for language: Portuguese (PT)
Saved: ./data/multiconer\MultiCoNER_Portuguese_test.json


0000.parquet:   0%|          | 0.00/2.00M [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/118k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/29.4M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/16453 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/854 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/246900 [00:00<?, ? examples/s]

Processing train split for language: Spanish (ES)
Saved: ./data/multiconer\MultiCoNER_Spanish_train.json
Processing validation split for language: Spanish (ES)
Saved: ./data/multiconer\MultiCoNER_Spanish_validation.json
Processing test split for language: Spanish (ES)
Saved: ./data/multiconer\MultiCoNER_Spanish_test.json


0000.parquet:   0%|          | 0.00/1.87M [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/112k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/25.8M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/16363 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/856 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/231190 [00:00<?, ? examples/s]

Processing train split for language: Swedish (SV)
Saved: ./data/multiconer\MultiCoNER_Swedish_train.json
Processing validation split for language: Swedish (SV)
Saved: ./data/multiconer\MultiCoNER_Swedish_validation.json
Processing test split for language: Swedish (SV)
Saved: ./data/multiconer\MultiCoNER_Swedish_test.json


0000.parquet:   0%|          | 0.00/2.19M [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/128k [00:00<?, ?B/s]

0000.parquet:   0%|          | 0.00/30.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/16429 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/851 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/238296 [00:00<?, ? examples/s]

Processing train split for language: Ukrainian (UK)
Saved: ./data/multiconer\MultiCoNER_Ukrainian_train.json
Processing validation split for language: Ukrainian (UK)
Saved: ./data/multiconer\MultiCoNER_Ukrainian_validation.json
Processing test split for language: Ukrainian (UK)
Saved: ./data/multiconer\MultiCoNER_Ukrainian_test.json


#### PIQN Model Training

First train the model with a learning rate of 3e-04 for 30 epochs and then for 10 epochs with a learning rate of 3e-06. This training is done on the "Multilingual (MULTI)" subset of MultiCoNER dataset to create a PIQN model trained on multilingual data. 

In [1]:
!python piqn.py train --config configs/flat.conf

--------------------------------------------------
Config:
Namespace(train_path='data/multiconer/MultiCoNER_Multilingual_train.json', valid_path='data/multiconer/MultiCoNER_Multilingual_validation.json', save_path='data/checkpoints/multiconer', save_path_include_iteration=False, init_eval=False, save_optimizer=False, train_log_iter=1, final_eval=False, train_batch_size=8, epochs=10, lr=3e-06, lr_warmup=0.0, weight_decay=0.01, max_grad_norm=1.0, match_solver='hungarian', type_loss='celoss', match_warmup_epoch=0, nil_weight=-1.0, match_boundary_weight=2.0, match_class_weight=2.0, loss_boundary_weight=5.0, loss_class_weight=2.0, deeply_weight='same', copy_weight=False, config='configs/flat.conf', local_rank=-1, world_size=-1, types_path='data/multiconer/multiconer_types.json', tokenizer_path='data/checkpoints/multiconer/mutliconer_train/2024-12-06_14:31:43.686654/final_model', lowercase=False, sampling_processes=4, label='mutliconer_train', log_path='data/checkpoints/multiconer', store_

Fine-tune the multilingual PIQN model for German by changing the flat.conf config files. The learning rate selected is 3e-06 and 10 epochs.

In [6]:
!python piqn.py train --config configs/flat.conf

--------------------------------------------------
Config:
Namespace(train_path='data/multiconer/MultiCoNER_German_train.json', valid_path='data/multiconer/MultiCoNER_German_validation.json', save_path='data/checkpoints/multiconer', save_path_include_iteration=False, init_eval=False, save_optimizer=False, train_log_iter=1, final_eval=False, train_batch_size=8, epochs=10, lr=3e-05, lr_warmup=0.0, weight_decay=0.01, max_grad_norm=1.0, match_solver='hungarian', type_loss='celoss', match_warmup_epoch=0, nil_weight=-1.0, match_boundary_weight=2.0, match_class_weight=2.0, loss_boundary_weight=5.0, loss_class_weight=2.0, deeply_weight='same', copy_weight=False, config='configs/flat.conf', local_rank=-1, world_size=-1, types_path='data/multiconer/multiconer_types.json', tokenizer_path='data/checkpoints/multiconer/mutliconer_train/2024-12-07_21:30:11.684675/final_model', lowercase=False, sampling_processes=4, label='mutliconer_train', log_path='data/checkpoints/multiconer', store_predictions=

#### Testing

Evaluating the finetuned German model on the German test dataset.

In [7]:
!python piqn.py eval --config configs/batch_eval.conf

--------------------------------------------------
Config:
data/checkpoints/multiconer/mutliconer_train/2024-12-08_15:44:41.213518/final_model
Namespace(dataset_path='data/multiconer/MultiCoNER_German_test.json', config='configs/batch_eval.conf', local_rank=-1, world_size=-1, types_path='data/multiconer/multiconer_types.json', tokenizer_path='data/checkpoints/multiconer/mutliconer_train/2024-12-08_15:44:41.213518/final_model', lowercase=False, sampling_processes=4, label='mutliconer_eval', log_path='data/checkpoints/multiconer', store_predictions=False, store_examples=False, example_count=None, debug=False, device_id=-1, model_path='data/checkpoints/multiconer/mutliconer_train/2024-12-08_15:44:41.213518/final_model', model_type='piqn', cpu=False, eval_batch_size=8, prop_drop=0.5, freeze_transformer=False, no_overlapping=False, no_partial_overlapping=True, no_duplicate=True, cls_threshold=0.7, boundary_threshold=0.6, pos_size=25, char_lstm_layers=1, lstm_layers=1, char_size=50, char_

Evaluating the base model on English test split.

In [4]:
!python piqn.py eval --config configs/batch_eval.conf

--------------------------------------------------
Config:
data/checkpoints/multiconer/mutliconer_train/2024-12-07_21:30:11.684675/final_model
Namespace(dataset_path='data/multiconer/MultiCoNER_Multilingual_test.json', config='configs/batch_eval.conf', local_rank=-1, world_size=-1, types_path='data/multiconer/multiconer_types.json', tokenizer_path='data/checkpoints/multiconer/mutliconer_train/2024-12-07_21:30:11.684675/final_model', lowercase=False, sampling_processes=4, label='mutliconer_eval', log_path='data/checkpoints/multiconer', store_predictions=False, store_examples=False, example_count=None, debug=False, device_id=-1, model_path='data/checkpoints/multiconer/mutliconer_train/2024-12-07_21:30:11.684675/final_model', model_type='piqn', cpu=False, eval_batch_size=8, prop_drop=0.5, freeze_transformer=False, no_overlapping=False, no_partial_overlapping=True, no_duplicate=True, cls_threshold=0.7, boundary_threshold=0.6, pos_size=25, char_lstm_layers=1, lstm_layers=1, char_size=50,

Evaluating the base model on Spanish

In [3]:
!python piqn.py eval --config configs/batch_eval.conf

--------------------------------------------------
Config:
data/checkpoints/multiconer/mutliconer_train/2024-12-07_21:30:11.684675/final_model
Namespace(dataset_path='data/multiconer/MultiCoNER_Multilingual_test.json', config='configs/batch_eval.conf', local_rank=-1, world_size=-1, types_path='data/multiconer/multiconer_types.json', tokenizer_path='data/checkpoints/multiconer/mutliconer_train/2024-12-07_21:30:11.684675/final_model', lowercase=False, sampling_processes=4, label='mutliconer_eval', log_path='data/checkpoints/multiconer', store_predictions=False, store_examples=False, example_count=None, debug=False, device_id=-1, model_path='data/checkpoints/multiconer/mutliconer_train/2024-12-07_21:30:11.684675/final_model', model_type='piqn', cpu=False, eval_batch_size=8, prop_drop=0.5, freeze_transformer=False, no_overlapping=False, no_partial_overlapping=True, no_duplicate=True, cls_threshold=0.7, boundary_threshold=0.6, pos_size=25, char_lstm_layers=1, lstm_layers=1, char_size=50,