In [1]:
# notebook copied from here:
# https://github.com/urchade/GLiNER/blob/main/examples/finetune.ipynb

import json
from gliner import GLiNER

import torch
from tqdm import tqdm
from transformers import get_cosine_schedule_with_warmup
import os


def read_jsonl(file_path):
    with open(file_path, "r") as f:
        data = [json.loads(line) for line in f]
    return data

### Load NER Dataset for Fine-Tuning

**TODO**: you need to load your own NER dataset here

In [2]:
# SAMPLE DATASET
# from https://github.com/urchade/GLiNER/blob/main/examples/sample_data.json
# train_path = "sample_data.json" 

# with open(train_path, "r") as f:
#     data = json.load(f)

In [3]:
# loading datasets generated in part-2
train_data = read_jsonl('data/generated_dataset_train.jsonl')
train_data = [x for x in train_data if len(x["ner"]) > 0]
eval_data = read_jsonl('data/generated_dataset_eval.jsonl')
len(train_data), len(eval_data)

(27, 8)

In [4]:
train_data[0]

{'tokenized_text': ['You',
  'can',
  'look',
  'at',
  'the',
  'construction',
  'after',
  'you',
  'have',
  'analyzed',
  'the',
  'poem',
  'and',
  'have',
  'better',
  'luck',
  'finding',
  'out',
  'why',
  'the',
  'devices',
  'are',
  'used',
  'that',
  'way',
  '.'],
 'ner': [[20, 21, 'tool']]}

### Load Pre-Trained Model

In [5]:
# available models: https://huggingface.co/urchade

model = GLiNER.from_pretrained("urchade/gliner_small")



### Fine-Tuning Code

In [6]:
from types import SimpleNamespace

# Define the hyperparameters in a config variable
config = SimpleNamespace(
    # num_steps=10000, # number of training iteration
    num_steps=10, # low number of steps for testing NOTE: MODIFY THIS
    train_batch_size=2, 
    eval_every=1000, # evaluation/saving steps
    save_directory="logs", # where to save checkpoints
    warmup_ratio=0.1, # warmup steps
    device='cpu',
    lr_encoder=1e-5, # learning rate for the backbone
    lr_others=5e-5, # learning rate for other parameters
    freeze_token_rep=False, # freeze of not the backbone
    
    # Parameters for set_sampling_params
    max_types=25, # maximum number of entity types during training
    shuffle_types=True, # if shuffle or not entity types
    random_drop=True, # randomly drop entity types
    max_neg_type_ratio=1, # ratio of positive/negative types, 1 mean 50%/50%, 2 mean 33%/66%, 3 mean 25%/75% ...
    max_len=384 # maximum sentence length
)

In [7]:
def train(model, config, train_data, eval_data=None):
    model = model.to(config.device)

    # Set sampling parameters from config
    model.set_sampling_params(
        max_types=config.max_types, 
        shuffle_types=config.shuffle_types, 
        random_drop=config.random_drop, 
        max_neg_type_ratio=config.max_neg_type_ratio, 
        max_len=config.max_len
    )
    
    model.train()

    # Initialize data loaders
    train_loader = model.create_dataloader(train_data, batch_size=config.train_batch_size, shuffle=True)

    # Optimizer
    optimizer = model.get_optimizer(config.lr_encoder, config.lr_others, config.freeze_token_rep)

    pbar = tqdm(range(config.num_steps))

    if config.warmup_ratio < 1:
        num_warmup_steps = int(config.num_steps * config.warmup_ratio)
    else:
        num_warmup_steps = int(config.warmup_ratio)

    scheduler = get_cosine_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=config.num_steps
    )

    iter_train_loader = iter(train_loader)

    for step in pbar:
        try:
            x = next(iter_train_loader)
        except StopIteration:
            iter_train_loader = iter(train_loader)
            x = next(iter_train_loader)

        for k, v in x.items():
            if isinstance(v, torch.Tensor):
                x[k] = v.to(config.device)

        loss = model(x)  # Forward pass
            
        # Check if loss is nan
        if torch.isnan(loss):
            continue

        loss.backward()  # Compute gradients
        optimizer.step()  # Update parameters
        scheduler.step()  # Update learning rate schedule
        optimizer.zero_grad()  # Reset gradients

        description = f"step: {step} | epoch: {step // len(train_loader)} | loss: {loss.item():.2f}"
        pbar.set_description(description)

        if (step + 1) % config.eval_every == 0:

            model.eval()
            
            if eval_data is not None:
                results, f1 = model.evaluate(eval_data["samples"], flat_ner=True, threshold=0.5, batch_size=12,
                                     entity_types=eval_data["entity_types"])

                print(f"Step={step}\n{results}")

            if not os.path.exists(config.save_directory):
                os.makedirs(config.save_directory)
                
            model.save_pretrained(f"{config.save_directory}/finetuned_{step}")

            model.train()

### Run Fine-Tuning

In [8]:
# You can also define evaluation data manually here.
# For now, evaluation only support fix entity types (but can be easily extended)
# eval_data = {
#     "entity_types": ["Person", 'Event Reservation'],
#     "samples": data[:10]
# }

train(model, config, train_data, eval_data)

step: 9 | epoch: 0 | loss: 12.24: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]


### Save and Load Fine-tuned Model

In [9]:
model.save_pretrained("custom-model-small")

In [10]:
loaded_model = GLiNER.from_pretrained("custom-model-small", local_files_only=True)

config.json not found in /home/demiangholipour/projects/zero-shot-ner-fine-tuning/notebooks/custom-model-small


### Evaluate Model

In [11]:
train_data[1]

{'tokenized_text': ['Ampro',
  'Little',
  'Board',
  '(',
  'TM',
  ')',
  'Complete',
  'CNC',
  'assembly',
  '.',
  'Ampro',
  'Little',
  'Board',
  '(',
  'TM',
  ')',
  'Complete',
  'CNC',
  'assembly',
  'Repaired',
  'and',
  'Working',
  '.',
  'If',
  'you',
  'have',
  'any',
  'technical',
  'questions',
  'or',
  'wish',
  'to',
  'offer',
  'technical',
  'information',
  'about',
  'any',
  'Ampro',
  'units',
  'then',
  'fill',
  'out',
  'the',
  'form',
  'below',
  '.'],
 'ner': [[7, 8, 'tool'], [17, 18, 'tool']]}

In [12]:
entity_types = sorted(set([t for item in eval_data for _, _, t in item['ner']]))
entity_types

['tool']

In [19]:
loaded_model.predict_entities("I'm using a screws to build my Ikea chair.", labels=['tool'])

[]

In [14]:
eval_str, f1 = loaded_model.evaluate(test_data=eval_data, flat_ner=True, threshold=0.5, batch_size=1, entity_types=entity_types)
print(eval_str)

P: 0.00%	R: 0.00%	F1: 0.00%



  _warn_prf(average, modifier, msg_start, len(result))


### Evaluation with Nervaluate

In [15]:
eval_predictions = []
for item in eval_data:
    text = ' '.join(item["tokenized_text"])
    predictions = loaded_model.predict_entities(text, labels=entity_types)
    eval_predictions.append(predictions)

In [16]:
def format_data_for_nervaluate(data):
    """
    Example for required format:
    true = [
    [{"label": "PER", "start": 2, "end": 4}],
    [{"label": "LOC", "start": 1, "end": 2},
     {"label": "LOC", "start": 3, "end": 4}]
    ]
    pred = [
        [{"label": "PER", "start": 2, "end": 4}],
        [{"label": "LOC", "start": 1, "end": 2},
        {"label": "LOC", "start": 3, "end": 4}]
    ]
    """
    formatted_data = []
    for item in data:
        formatted_entities = []
        for start, end, label in item['ner']:
            formatted_entities.append({"label": label, "start": start, "end": end})
        formatted_data.append(formatted_entities)
    return formatted_data

In [17]:
formatted_eval_data = format_data_for_nervaluate(eval_data)

In [18]:
from nervaluate import Evaluator
from pprint import pprint

# true = [
#     [{"label": "PER", "start": 2, "end": 4}],
#     [{"label": "LOC", "start": 1, "end": 2},
#      {"label": "LOC", "start": 3, "end": 4}]
# ]

# pred = [
#     [{"label": "PER", "start": 2, "end": 4}],
#     [{"label": "LOC", "start": 1, "end": 2},
#      {"label": "LOC", "start": 3, "end": 4}]
# ]

# evaluator = Evaluator(true, pred, tags=['LOC', 'PER'])

evaluator = Evaluator(formatted_eval_data, eval_predictions, tags=['tool'])

# Returns overall metrics and metrics for each tag

results, results_per_tag = evaluator.evaluate()

pprint(results)

{'ent_type': {'actual': 0,
              'correct': 0,
              'f1': 0,
              'incorrect': 0,
              'missed': 9,
              'partial': 0,
              'possible': 9,
              'precision': 0,
              'recall': 0.0,
              'spurious': 0},
 'exact': {'actual': 0,
           'correct': 0,
           'f1': 0,
           'incorrect': 0,
           'missed': 9,
           'partial': 0,
           'possible': 9,
           'precision': 0,
           'recall': 0.0,
           'spurious': 0},
 'partial': {'actual': 0,
             'correct': 0,
             'f1': 0,
             'incorrect': 0,
             'missed': 9,
             'partial': 0,
             'possible': 9,
             'precision': 0,
             'recall': 0.0,
             'spurious': 0},
 'strict': {'actual': 0,
            'correct': 0,
            'f1': 0,
            'incorrect': 0,
            'missed': 9,
            'partial': 0,
            'possible': 9,
            'precis