In [None]:
!pip install gliner

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "true"
import json

import torch.nn as nn
import torch.nn.functional as F

from gliner import GLiNER
from gliner.training import Trainer, TrainingArguments
from gliner.data_processing.collator import DataCollator

In [None]:
# Load the processed output from the JSON file which contains data ready for training
with open("../data/processed_output.json", "r", encoding="utf-8") as file:
    processed_output = json.load(file)

print(len(processed_output))
print(processed_output[0])

In [None]:
train_dataset = processed_output[:int(len(processed_output)*0.9)]
test_dataset = processed_output[int(len(processed_output)*0.9):]

In [None]:
gliner_model_base_ft = GLiNER.from_pretrained("urchade/gliner_small")

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]



In [None]:
data_collator_base_ft = DataCollator(gliner_model_base_ft.config, data_processor=gliner_model_base_ft.data_processor, prepare_labels=True)

In [None]:
num_steps = 5 # 3380
batch_size = 8
data_size = len(train_dataset) # 2700
num_batches = data_size // batch_size
num_epochs = 5 # max(1, num_steps // num_batches)

In [None]:
training_args_base_ft = TrainingArguments(
    output_dir="gliner_model_base_ft",
    per_device_train_batch_size=batch_size,  # adjust
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    learning_rate=5e-5,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=False,
    metric_for_best_model="eval_loss",  # optionally use custom metric (e.g. F1)
    report_to="none",
)

In [None]:
trainer_base_ft = Trainer(
    model=gliner_model_base_ft,
    args=training_args_base_ft,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=gliner_model_base_ft.data_processor.transformer_tokenizer,
    data_collator=data_collator_base_ft,
)

  trainer_ft = Trainer(


In [None]:
trainer_base_ft.train()

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Epoch,Training Loss,Validation Loss
1,No log,121512.273438
2,No log,56086.707031
3,No log,82117.328125
4,No log,102638.195312
5,No log,94575.921875


TrainOutput(global_step=60, training_loss=176490.61666666667, metrics={'train_runtime': 222.7727, 'train_samples_per_second': 2.02, 'train_steps_per_second': 0.269, 'total_flos': 0.0, 'train_loss': 176490.61666666667, 'epoch': 5.0})

In [None]:
trainer_base_ft.save_model("finetuned_gliner_model_base")

In [None]:
best_ckpt_base_ft = trainer_base_ft.state.best_model_checkpoint

In [None]:
best_ckpt_base_ft_model = GLiNER.from_pretrained(best_ckpt_base_ft, load_tokenizer=True)

config.json not found in /content/gliner_model_ft/checkpoint-24


In [None]:
# Example inference
text = "Any part of your neck  muscles, bones, joints, tendons, ligaments, or nerves  can cause neck problems. Neck pain is very common. Pain may also come from your shoulder, jaw, head, or upper arms. Muscle strain or tension often causes neck pain. The problem is usually overuse, such as from sitting at a computer for too long. Sometimes you can strain your neck muscles from sleeping in an awkward position or overdoing it during exercise. Falls or accidents, including car accidents, are another common cause of neck pain. Whiplash, a soft tissue injury to the neck, is also called neck sprain or strain. Treatment depends on the cause, but may include applying ice, taking pain relievers, getting physical therapy or wearing a cervical collar. You rarely need surgery."

labels = ["medical_condition", "body_part", "cause", "symptom", "treatment"]

entities = best_ckpt_base_ft_model.predict_entities(text, labels, threshold=0.5)

for entity in entities:
    print(entity["text"], "=>", entity["label"])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Any => symptom
part => treatment
of => treatment
your => treatment
neck  muscles, bones, joints => treatment
, => treatment
tendons => treatment
, => treatment
ligaments => treatment
, => treatment
or => treatment
nerves  can cause => treatment
neck problems => treatment
. => treatment
Neck => treatment
pain is very common => treatment
. => treatment
Pain => treatment
may also come => treatment
from => treatment
your => treatment
shoulder, jaw, head, or upper => treatment
arms => treatment
. => treatment
Muscle => treatment
strain or tension => treatment
often => treatment
causes => treatment
neck => treatment
pain. The problem is usually overuse, such as from sitting => treatment
at => treatment
a => treatment
computer for too long => treatment
. => treatment
Sometimes => treatment
you can => treatment
strain your neck muscles from sleeping => treatment
in => treatment
an => treatment
awkward => treatment
position or overdoing it during exercise. Falls => treatment
or => treatment
acc