In [1]:
! pip install gliner2



In [2]:
! python -m spacy download en_core_web_trf

Collecting en-core-web-trf==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_trf-3.8.0/en_core_web_trf-3.8.0-py3-none-any.whl (457.4 MB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m457.4/457.4 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting spacy-curated-transformers<1.0.0,>=0.2.2 (from en-core-web-trf==3.8.0)
  Downloading spacy_curated_transformers-0.3.1-py2.py3-none-any.whl.metadata (2.7 kB)
Collecting curated-transformers<0.2.0,>=0.1.0 (from spacy-curated-transformers<1.0.0,>=0.2.2->en-core-web-trf==3.8.0)
  Downloading curated_transformers-0.1.1-py2.py3-none-any.whl.metadata (965 bytes)
Collecting curated-tokenizers<0.1.0,>=0.0.9 (from spacy-curated-transformers<1.0.0,>=0.2.2->en-core-web-trf==3.8.0)
  Downloading curated_tokenizers-0.0.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.9 

In [13]:
from gliner2 import GLiNER2
import pandas as pd
import spacy
from transformers import pipeline
import gc
import torch

def cleanup_memory():
    """Free GPU and CPU memory by running garbage collection and clearing CUDA cache."""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()

In [None]:

# spaCy label mapping
SPACY_ENTITY_MAPPING = {
    # People not in target schema
    "PERSON": None,

    # Cities / geopolitical entities
    "GPE": "CITY",

    # Locations and facilities: generally landmarks/POIs in this schema
    "LOC": "LANDMARK",
    "FAC": "LANDMARK",

    # Orgs/products/work-of-art/events can often be POIs or hotel names in travel text
    "ORG": "HOTEL_NAME",
    "PRODUCT": "HOTEL_NAME",
    "WORK_OF_ART": "LANDMARK",
    "EVENT": "LANDMARK",

    # Common non-location entity types not in target schema
    "CARDINAL": None,
    "DATE": None,
    "TIME": None,
    "MONEY": None,
    "PERCENT": None,
    "QUANTITY": None,
    "ORDINAL": None,
    "LANGUAGE": None,
    "LAW": None,
    "NORP": None,
}

ROBERTA_ENTITY_MAPPING = {
    "PER": None,
    "LOC": "LANDMARK",
    "ORG": "HOTEL_NAME",
    "MISC": None,
}


# Test Cases

In [7]:
test_cases = {
"label_confusion" : [
  {
    "text": "On Rembrandtplein, at number 19, this mid-sized property offers 40 rooms above a row of caf√©s and bars; Rembrandtplein Hotel is often mistaken for the square itself in ride-hailing apps.",
    "entities": [
      {"label": "LANDMARK", "text": "Rembrandtplein"},
      {"label": "HOTEL_NAME", "text": "Rembrandtplein Hotel"}
    ]
  },
  {
    "text": "Just off Oxford Circus, guests check in at a narrow fa√ßade on Oxford Street, although Oxford Street Hotel officially lists its entrance on Ramillies Place.",
    "entities": [
      {"label": "LANDMARK", "text": "Oxford Circus"},
      {"label": "STREET_NAME", "text": "Oxford Street"},
      {"label": "HOTEL_NAME", "text": "Oxford Street Hotel"},
      {"label": "STREET_NAME", "text": "Ramillies Place"}
    ]
  },
  {
    "text": "The building at 221B Baker Street operates as a small hotel today, with Baker Street Rooms using the famous address despite being unrelated to the literary landmark.",
    "entities": [
      {"label": "ADDRESS", "text": "221B Baker Street"},
      {"label": "HOTEL_NAME", "text": "Baker Street Rooms"}
    ]
  },
  {
    "text": "Facing Dam Square and the Royal Palace, rooms here are marketed under the name Dam Square Hotel, even though the registered address is Damrak 7.",
    "entities": [
      {"label": "LANDMARK", "text": "Dam Square"},
      {"label": "LANDMARK", "text": "Royal Palace"},
      {"label": "HOTEL_NAME", "text": "Dam Square Hotel"},
      {"label": "ADDRESS", "text": "Damrak 7"}
    ]
  },
  {
    "text": "Although its windows overlook Museumplein, the property is entered via Honthorststraat; Museumplein Residence uses the square name primarily for branding.",
    "entities": [
      {"label": "LANDMARK", "text": "Museumplein"},
      {"label": "STREET_NAME", "text": "Honthorststraat"},
      {"label": "HOTEL_NAME", "text": "Museumplein Residence"}
    ]
  },
  {
    "text": "Travelers often assume Grand Canal refers to the waterway, but Grand Canal Hotel occupies a converted palazzo on Fondamenta San Simeon Piccolo.",
    "entities": [
      {"label": "LANDMARK", "text": "Grand Canal"},
      {"label": "HOTEL_NAME", "text": "Grand Canal Hotel"},
      {"label": "STREET_NAME", "text": "Fondamenta San Simeon Piccolo"}
    ]
  },
  {
    "text": "Near the Colosseum metro stop, check-in takes place on Via dei Fori Imperiali, while the accommodation itself is branded as Colosseum Metro Hotel.",
    "entities": [
      {"label": "LANDMARK", "text": "Colosseum"},
      {"label": "STREET_NAME", "text": "Via dei Fori Imperiali"},
      {"label": "HOTEL_NAME", "text": "Colosseum Metro Hotel"}
    ]
  },
  {
    "text": "Marketed as Eiffel Tower Avenue, the hotel is actually set back from the landmark, with its street address listed on Avenue de la Bourdonnais.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Eiffel Tower Avenue"},
      {"label": "LANDMARK", "text": "Eiffel Tower"},
      {"label": "STREET_NAME", "text": "Avenue de la Bourdonnais"}
    ]
  },
  {
    "text": "Union Square appears prominently in the property name, yet Union Square Suites is located on Post Street, several blocks west of the plaza.",
    "entities": [
      {"label": "LANDMARK", "text": "Union Square"},
      {"label": "HOTEL_NAME", "text": "Union Square Suites"},
      {"label": "STREET_NAME", "text": "Post Street"}
    ]
  },
  {
    "text": "Although commonly referred to as Times Square, the hotel's reception and mailing address are on West 46th Street, where Times Square Suites occupies the upper floors.",
    "entities": [
      {"label": "LANDMARK", "text": "Times Square"},
      {"label": "STREET_NAME", "text": "West 46th Street"},
      {"label": "HOTEL_NAME", "text": "Times Square Suites"}
    ]
  }
],
"rephrasing_sensitivity": [
  {
    "text": "I have a reservation at the Rembrandt Square Hotel on Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 27, "end": 48},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 52, "end": 67}
    ]
  },
  {
    "text": "I'm booked at Rembrandt Square Hotel, located in Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 14, "end": 35},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 48, "end": 63}
    ]
  },
  {
    "text": "I have a confirmed booking at Rembrandt Square Hotel in Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 28, "end": 49},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 53, "end": 68}
    ]
  },
  {
    "text": "I'll be staying at the Rembrandt Square Hotel at Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 23, "end": 44},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 48, "end": 63}
    ]
  },
  {
    "text": "I have accommodation booked at Rembrandt Square Hotel on Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 28, "end": 49},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 53, "end": 68}
    ]
  },
  {
    "text": "I have a hotel booking at Rembrandt Square Hotel, Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 25, "end": 46},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 48, "end": 63}
    ]
  },
  {
    "text": "I'm staying at the Rembrandt Square Hotel in the Rembrandt Square area.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 18, "end": 39},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 51, "end": 66}
    ]
  },
  {
    "text": "I've made a reservation at Rembrandt Square Hotel at Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 27, "end": 48},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 52, "end": 67}
    ]
  },
  {
    "text": "My booking is at the Rembrandt Square Hotel, right on Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 21, "end": 42},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 53, "end": 68}
    ]
  },
  {
    "text": "I have a confirmed stay at Rembrandt Square Hotel on Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 25, "end": 46},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 50, "end": 65}
    ]
  },
  {
    "text": "I've booked a room at Rembrandt Square Hotel in Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 21, "end": 42},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 46, "end": 61}
    ]
  },
  {
    "text": "I have lodging reserved at Rembrandt Square Hotel, located at Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 25, "end": 46},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 59, "end": 74}
    ]
  },
  {
    "text": "I have a room booked at the Rembrandt Square Hotel at Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 27, "end": 48},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 52, "end": 67}
    ]
  },
  {
    "text": "I'm checked in at Rembrandt Square Hotel in Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 17, "end": 38},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 42, "end": 57}
    ]
  },
  {
    "text": "I have a hotel reservation for Rembrandt Square Hotel at Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel", "start": 29, "end": 50},
      {"label": "LANDMARK", "text": "Rembrandt Square", "start": 54, "end": 69}
    ]
  }
],
"placement_sensitivity":[
  {
    "text": "The Rembrandt Square Hotel located at Rembrandt Square offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel"},
      {"label": "LANDMARK", "text": "Rembrandt Square"}
    ]
  },
  {
    "text": "Located at Rembrandt Square, the Rembrandt Square Hotel offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "entities": [
      {"label": "LANDMARK", "text": "Rembrandt Square"},
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel"}
    ]
  },
  {
    "text": "Offering comfortable accommodation in the heart of the city for leisure and business travelers, the Rembrandt Square Hotel is located at Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel"},
      {"label": "LANDMARK", "text": "Rembrandt Square"}
    ]
  },
  {
    "text": "Comfortable accommodation in the heart of the city for leisure and business travelers is offered by the Rembrandt Square Hotel located at Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel"},
      {"label": "LANDMARK", "text": "Rembrandt Square"}
    ]
  },
  {
    "text": "For leisure and business travelers, comfortable accommodation in the heart of the city is offered by the Rembrandt Square Hotel located at Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel"},
      {"label": "LANDMARK", "text": "Rembrandt Square"}
    ]
  },
  {
    "text": "The Rembrandt Square Hotel offers comfortable accommodation in the heart of the city for leisure and business travelers and is located at Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel"},
      {"label": "LANDMARK", "text": "Rembrandt Square"}
    ]
  },
  {
    "text": "At Rembrandt Square, the Rembrandt Square Hotel offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "entities": [
      {"label": "LANDMARK", "text": "Rembrandt Square"},
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel"}
    ]
  },
  {
    "text": "Comfortable accommodation in the heart of the city for leisure and business travelers is offered at Rembrandt Square by the Rembrandt Square Hotel.",
    "entities": [
      {"label": "LANDMARK", "text": "Rembrandt Square"},
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel"}
    ]
  },
  {
    "text": "The Rembrandt Square Hotel, offering comfortable accommodation in the heart of the city for leisure and business travelers, is located at Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel"},
      {"label": "LANDMARK", "text": "Rembrandt Square"}
    ]
  },
  {
    "text": "Offering comfortable accommodation in the heart of the city for leisure and business travelers, the Rembrandt Square Hotel located at Rembrandt Square.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Rembrandt Square Hotel"},
      {"label": "LANDMARK", "text": "Rembrandt Square"}
    ]
  }
],
"variable_entity_length": [
  {
    "text": "The Hotel Six located at 4 Oak St offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Hotel Six"},
      {"label": "ADDRESS", "text": "4 Oak St"}
    ]
  },
  {
    "text": "The Grand Harbor Hotel located at 221B Baker Street offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Grand Harbor Hotel"},
      {"label": "ADDRESS", "text": "221B Baker Street"}
    ]
  },
  {
    "text": "The The Royal Crescent Boutique & Spa Hotel located at 18-24 Royal Crescent, Bath BA1 2LS offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "The Royal Crescent Boutique & Spa Hotel"},
      {"label": "ADDRESS", "text": "18-24 Royal Crescent, Bath BA1 2LS"},
    ]
  },
  {
    "text": "The M Hotel located at 9 Rue du Bac offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "M Hotel"},
      {"label": "ADDRESS", "text": "9 Rue du Bac"}
    ]
  },
  {
    "text": "The The St. Augustine Waterfront Conference Hotel & Suites located at 500 Avenida Menendez, St. Augustine, FL 32084 offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "The St. Augustine Waterfront Conference Hotel & Suites"},
      {"label": "ADDRESS", "text": "500 Avenida Menendez, St. Augustine, FL 32084"},
    ]
  },
  {
    "text": "The Noor located at 1 Al Noor Rd offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Noor"},
      {"label": "ADDRESS", "text": "1 Al Noor Rd"}
    ]
  },
  {
    "text": "The Hotel de l'Op√©ra located at 2 Place de l'Op√©ra offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Hotel de l'Op√©ra"},
      {"label": "ADDRESS", "text": "2 Place de l'Op√©ra"}
    ]
  },
  {
    "text": "The The International Travelers' Rest Hotel, Residences, and Extended Stay located at 7777 West Great Northern Boulevard, Building C, Suite 120 offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "The International Travelers' Rest Hotel, Residences, and Extended Stay"},
      {"label": "ADDRESS", "text": "7777 West Great Northern Boulevard, Building C, Suite 120"}
    ]
  },
  {
    "text": "The Ivy located at 10 Downing St offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "Ivy"},
      {"label": "ADDRESS", "text": "10 Downing St"}
    ]
  },
  {
    "text": "The The Fjordview Mountain Lodge & Northern Lights Retreat located at Kilometer 12, Scenic Route 9, Longyear Valley offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "entities": [
      {"label": "HOTEL_NAME", "text": "The Fjordview Mountain Lodge & Northern Lights Retreat"},
      {"label": "ADDRESS", "text": "Kilometer 12, Scenic Route 9, Longyear Valley"},
    ]
  }
],
"case_sensitivity_errors": [
  {
    "id": "case_sensitivity_errors_01_canonical",
    "text": "The Riverside Hotel is located at 128 Market Street, near the central railway station.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "The Riverside Hotel" },
      { "label": "ADDRESS", "text": "128 Market Street" },
    ]
  },
  {
    "id": "case_sensitivity_errors_01_case_variant",
    "text": "the riverside hotel is located at 128 market street, near the central railway station.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "the riverside hotel" },
      { "label": "ADDRESS", "text": "128 market street" },
    ]
  },
  {
    "id": "case_sensitivity_errors_02_canonical",
    "text": "Guests can find the Grand Palace Hotel on King Street at Number 77, opposite the courthouse.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "Grand Palace Hotel" },
      { "label": "ADDRESS", "text": "Number 77" },
      { "label": "STREET_NAME", "text": "King Street" }
    ]
  },
  {
    "id": "case_sensitivity_errors_02_case_variant",
    "text": "guests can find the grand palace hotel on king street at number 77, opposite the courthouse.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "grand palace hotel" },
      { "label": "ADDRESS", "text": "number 77" },
      { "label": "STREET_NAME", "text": "king street" }
    ]
  },
  {
    "id": "case_sensitivity_errors_03_canonical",
    "text": "The Sunrise Boutique Hotel is situated on Ocean Drive at No. 502, facing the waterfront.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "Sunrise Boutique Hotel" },
      { "label": "STREET_NAME", "text": "Ocean Drive" },
      { "label": "ADDRESS", "text": "No. 502" }
    ]
  },
  {
    "id": "case_sensitivity_errors_03_case_variant",
    "text": "the sunrise boutique hotel is situated on ocean drive at no. 502, facing the waterfront.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "sunrise boutique hotel" },
      { "label": "STREET_NAME", "text": "ocean drive" },
      { "label": "ADDRESS", "text": "no. 502" }
    ]
  },
  {
    "id": "case_sensitivity_errors_04_canonical",
    "text": "The Linden Tree Hotel is located at 310 Elm Road, close to the university campus.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "Linden Tree Hotel" },
      { "label": "ADDRESS", "text": "310 Elm Road" }
    ]
  },
  {
    "id": "case_sensitivity_errors_04_case_variant",
    "text": "the linden tree hotel is located at 310 elm road, close to the university campus.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "linden tree hotel" },
      { "label": "ADDRESS", "text": "310 elm road" }
    ]
  },
  {
    "id": "case_sensitivity_errors_05_canonical",
    "text": "Visitors will find the Heritage Hotel at 221B Baker Street, next to the historic museum.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "Heritage Hotel" },
      { "label": "ADDRESS", "text": "221B Baker Street" },
    ]
  },
  {
    "id": "case_sensitivity_errors_05_case_variant",
    "text": "visitors will find the heritage hotel at 221b baker street, next to the historic museum.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "heritage hotel" },
      { "label": "ADDRESS", "text": "221b baker street" },
    ]
  },
  {
    "id": "case_sensitivity_errors_06_canonical",
    "text": "The City View Hotel stands on Queen's Road at Number 89, beside the convention center.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "City View Hotel" },
      { "label": "STREET_NAME", "text": "Queen's Road" },
      { "label": "ADDRESS", "text": "Number 89" },
    ]
  },
  {
    "id": "case_sensitivity_errors_06_case_variant",
    "text": "the city view hotel stands on queen's road at number 89, beside the convention center.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "city view hotel" },
      { "label": "STREET_NAME", "text": "queen's road" },
      { "label": "ADDRESS", "text": "number 89" },
    ]
  },
  {
    "id": "case_sensitivity_errors_07_canonical",
    "text": "The Urban Stay Hotel is located on Avenida Paulista at 1578, close to the metro station.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "Urban Stay Hotel" },
      { "label": "STREET_NAME", "text": "Avenida Paulista" },
      { "label": "ADDRESS", "text": "1578" }
    ]
  },
  {
    "id": "case_sensitivity_errors_07_case_variant",
    "text": "the urban stay hotel is located on avenida paulista at 1578, close to the metro station.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "urban stay hotel" },
      { "label": "STREET_NAME", "text": "avenida paulista" },
      { "label": "ADDRESS", "text": "1578" }
    ]
  },
  {
    "id": "case_sensitivity_errors_08_canonical",
    "text": "The Seaside Hotel is positioned at 1400 Ocean Drive, directly facing the beach.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "Seaside Hotel" },
      { "label": "ADDRESS", "text": "1400 Ocean Drive" },
    ]
  },
  {
    "id": "case_sensitivity_errors_08_case_variant",
    "text": "the seaside hotel is positioned at 1400 ocean drive, directly facing the beach.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "seaside hotel" },
      { "label": "ADDRESS", "text": "1400 ocean drive" },
    ]
  },
  {
    "id": "case_sensitivity_errors_09_canonical",
    "text": "The Mountain Lodge Hotel is found on Alpine Way at No. 64, near the ski lift.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "Mountain Lodge Hotel" },
      { "label": "STREET_NAME", "text": "Alpine Way" },
      { "label": "ADDRESS", "text": "No. 64" }
    ]
  },
  {
    "id": "case_sensitivity_errors_09_case_variant",
    "text": "the mountain lodge hotel is found on alpine way at no. 64, near the ski lift.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "mountain lodge hotel" },
      { "label": "STREET_NAME", "text": "alpine way" },
      { "label": "ADDRESS", "text": "no. 64" }
    ]
  },
  {
    "id": "case_sensitivity_errors_10_canonical",
    "text": "The Royal Heights Hotel operates from 45 Victoria Street, across from Central Park.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "Royal Heights Hotel" },
      { "label": "ADDRESS", "text": "45 Victoria Street" },
      { "label": "LANDMARK", "text": "Central Park" }
    ]
  },
  {
    "id": "case_sensitivity_errors_10_case_variant",
    "text": "the royal heights hotel operates from 45 victoria street, across from central park.",
    "entities": [
      { "label": "HOTEL_NAME", "text": "royal heights hotel" },
      { "label": "ADDRESS", "text": "45 victoria street" },
      { "label": "LANDMARK", "text": "central park" }
    ]
  }
],
"spelling_errors" :[
  {
    "text": "The Grand Hiltno is a luxury hotel located at 123 Brodway Avnue, New Yrok. This Hiltno proprty offers spacious roms, a fintess cneter, and easy acess to major atrractions.",
    "entities": [
      { "label": "HOTEL_NAME", "span": "The Grand Hiltno" },
      { "label": "HOTEL_BRAND", "span": "Hiltno" },
      { "label": "ADDRESS", "span": "123 Brodway Avnue, New Yrok" },
    ]
  },
  {
    "text": "Stay at the Marrioot Marquis in downtown Chciago, situatd at 456 Michgan Avene. The Marrioot is known for its premuim service and conveinent loction near the river.",
    "entities": [
      { "label": "HOTEL_NAME", "span": "Marrioot Marquis" },
      { "label": "HOTEL_BRAND", "span": "Marrioot" },
      { "label": "CITY", "span": "Chciago" },
      { "label": "ADDRESS", "span": "456 Michgan Avene" },
    ]
  },
  {
    "text": "The Hyatt Regncy San Frnacisco can be found at 789 Embarcdero Rd. This Hyat hotel feautres bay vews, modern metting spaces, and renownd dinning optons.",
    "entities": [
      { "label": "HOTEL_NAME", "span": "Hyatt Regncy San Frnacisco" },
      { "label": "ADDRESS", "span": "789 Embarcdero Rd" },
      { "label": "HOTEL_BRAND", "span": "Hyat" }
    ]
  },
  {
    "text": "Enjoy a comfrtable stay at the Sheratonn Times Sqaure Hotel, located at 811 7th Avnue, New Yrok City. The Sheratonn offers recently renovted roms and a larg indoor pool.",
    "entities": [
      { "label": "HOTEL_NAME", "span": "Sheratonn Times Sqaure Hotel" },
      { "label": "ADDRESS", "span": "811 7th Avnue, New Yrok City" },
      { "label": "HOTEL_BRAND", "span": "Sheratonn" }
    ]
  },
  {
    "text": "The Intercontinetal Londn Park Lane is a prestigous hotel at 1 Park Ln, Londn. This Interconinental propety is famos for its elegent suites and execuative lounge.",
    "entities": [
      { "label": "HOTEL_NAME", "span": "Intercontinetal Londn Park Lane" },
      { "label": "ADDRESS", "span": "1 Park Ln, Londn" },
      { "label": "HOTEL_BRAND", "span": "Interconinental" }
    ]
  },
  {
    "text": "Book your visit at the Ritz-Carlonn Los Angles, locaed at 900 W Olympc Blvd. The Ritz-Carlon offers luxury aminities and panoramic city veiwes.",
    "entities": [
      { "label": "HOTEL_NAME", "span": "Ritz-Carlonn Los Angles" },
      { "label": "ADDRESS", "span": "900 W Olympc Blvd" },
      { "label": "HOTEL_BRAND", "span": "Ritz-Carlon" }
    ]
  },
  {
    "text": "The Westin St. Fransis San Frnacisco is located at 335 Powel Stret. This Westn hotel provides the Heavenl Bed experince and close proxmity to Union Sqaure.",
    "entities": [
      { "label": "HOTEL_NAME", "span": "Westin St. Fransis San Frnacisco" },
      { "label": "ADDRESS", "span": "335 Powel Stret" },
      { "label": "HOTEL_BRAND", "span": "Westn" },
      { "label": "LANDMARK", "span": "Union Sqaure" }
    ]
  },
  {
    "text": "Experience confort at the Holidy Inn Exprss Boston, situatd at 69 Bexer Stret. This Holidy Inn Exprss includes complmentary brakfast and frre WiFi.",
    "entities": [
      { "label": "HOTEL_NAME", "span": "Holidy Inn Exprss Boston" },
      { "label": "ADDRESS", "span": "69 Bexer Stret" },
      { "label": "HOTEL_BRAND", "span": "Holidy Inn Exprss" }
    ]
  },
  {
    "text": "The DoubleTree by Hliton Amsterdm Centrl Sttion can be found at Oosterdoksstrat 4. This DoubleTee hotel is known for its warm cookie welcomme and modern facilties.",
    "entities": [
      { "label": "HOTEL_NAME", "span": "DoubleTree by Hliton Amsterdm Centrl Sttion" },
      { "label": "ADDRESS", "span": "Oosterdoksstrat 4" },
      { "label": "HOTEL_BRAND", "span": "DoubleTee" }
    ]
  },
  {
    "text": "Stay at the Radisson Blu Berln Alexnderplatz, located at Karl-Libknecht-Strse 3. The Radisson Bllu hotel offers stylish roms and a rooftop welness area.",
    "entities": [
      { "label": "HOTEL_NAME", "span": "Radisson Blu Berln Alexnderplatz" },
      { "label": "ADDRESS", "span": "Karl-Libknecht-Strse 3" },
      { "label": "HOTEL_BRAND", "span": "Radisson Bllu" }
    ]
  }
]
}

In [None]:
labels = set()
for test_type in test_cases.keys():
    for test_item in test_cases[test_type]:
        for entites in test_item['entities']:
            labels.add(entites['label'])
labels = list(labels)


['ADDRESS', 'CITY', 'HOTEL_NAME', 'HOTEL_BRAND', 'STREET_NAME', 'LANDMARK']

# Modeling

In [None]:
# ---------- GLiNER helpers ----------

def resolve_span_conflicts(entities):
    """For entities with the same span, keep only the one with highest confidence."""
    if not entities:
        return entities
    span_to_entities = {}
    for label, value in entities.items():
        if value is not None:
            span = (value['start'], value['end'])
            span_to_entities.setdefault(span, []).append((label, value))
    labels_to_remove = set()
    for span, entity_list in span_to_entities.items():
        if len(entity_list) > 1:
            entity_list.sort(key=lambda x: x[1]['confidence'], reverse=True)
            for label, _ in entity_list[1:]:
                labels_to_remove.add(label)
    resolved = {}
    for label, value in entities.items():
        if label in labels_to_remove:
            resolved[label] = None
        else:
            resolved[label] = value['text'] if value is not None else None
    return resolved


# ---------- Load / unload helpers for each model ----------

_gliner_model = None
_gliner_schema = None
_roberta_pipe = None
_spacy_nlp = None

def load_gliner():
    global _gliner_model, _gliner_schema
    if _gliner_model is None:
        print("Loading GLiNER model...")
        _gliner_model = GLiNER2.from_pretrained("fastino/gliner2-large-v1")
        _gliner_schema = _gliner_model.create_schema().entities(labels, dtype="str")

def unload_gliner():
    global _gliner_model, _gliner_schema
    if _gliner_model is not None:
        print("Unloading GLiNER model...")
        del _gliner_model, _gliner_schema
        _gliner_model = None
        _gliner_schema = None
        cleanup_memory()

def load_roberta():
    global _roberta_pipe
    if _roberta_pipe is None:
        print("Loading RoBERTa-NER model...")
        _roberta_pipe = pipeline("ner", aggregation_strategy="simple", model="51la5/roberta-large-NER")

def unload_roberta():
    global _roberta_pipe
    if _roberta_pipe is not None:
        print("Unloading RoBERTa-NER model...")
        del _roberta_pipe
        _roberta_pipe = None
        cleanup_memory()

def load_spacy():
    global _spacy_nlp
    if _spacy_nlp is None:
        print("Loading spaCy model...")
        _spacy_nlp = spacy.load("en_core_web_trf")

def unload_spacy():
    global _spacy_nlp
    if _spacy_nlp is not None:
        print("Unloading spaCy model...")
        del _spacy_nlp
        _spacy_nlp = None
        cleanup_memory()


# ---------- Extraction functions ----------

def extract_gliner(text):
    result = _gliner_model.extract(text, _gliner_schema, include_spans=True, include_confidence=True)
    resolved = resolve_span_conflicts(result.get('entities', {}))
    return {(label, txt) for label, txt in resolved.items() if txt is not None}

def extract_roberta(text):
    results = _roberta_pipe(text)
    entities = set()
    for r in results:
        mapped = ROBERTA_ENTITY_MAPPING.get(r['entity_group'])
        if mapped:
            entities.add((mapped, r['word']))
    return entities

def extract_spacy(text):
    doc = _spacy_nlp(text)
    entities = set()
    for ent in doc.ents:
        mapped = SPACY_ENTITY_MAPPING.get(ent.label_)
        if mapped:
            entities.add((mapped, ent.text))
    return entities


ALGORITHMS = {
    "GLiNER": (extract_gliner, load_gliner, unload_gliner),
    #"RoBERTa-NER": (extract_roberta, load_roberta, unload_roberta),
    "spaCy": (extract_spacy, load_spacy, unload_spacy),
}

# ---------- Metrics helper ----------

def compute_metrics(tp, fp, fn):
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
    return precision, recall, f1


# ---------- Evaluation loop ----------

all_results = []
all_phrase_metrics = []
all_tc_metrics = []
all_algo_metrics = []

for algo_name, (extract_fn, load_fn, unload_fn) in ALGORITHMS.items():
    # Load model for this algorithm
    load_fn()

    algo_tp, algo_fp, algo_fn = 0, 0, 0

    for test_case_name, test_case_items in test_cases.items():
        tc_tp, tc_fp, tc_fn = 0, 0, 0

        for i, item in enumerate(test_case_items):
            phrase = item["text"]
            phrase_id = item.get("id", f"{test_case_name}_{i+1}")

            # Ground truth
            gt_entities = set()
            for ent in item.get("entities", []):
                ent_text = ent.get("text") or ent.get("span") or ""
                gt_entities.add((ent["label"], ent_text))

            # Prediction with latency measurement
            import time
            start_time = time.perf_counter()
            pred_entities = extract_fn(phrase)
            latency = time.perf_counter() - start_time

            # Phrase-level metrics
            tp = len(pred_entities & gt_entities)
            fp = len(pred_entities - gt_entities)
            fn = len(gt_entities - pred_entities)
            p, r, f1 = compute_metrics(tp, fp, fn)
            tc_tp += tp; tc_fp += fp; tc_fn += fn

            all_phrase_metrics.append({
                "Algorithm": algo_name,
                "Test_Case": test_case_name,
                "Phrase_ID": phrase_id,
                "Phrase": phrase,
                "TP": tp, "FP": fp, "FN": fn,
                "Precision": round(p, 4),
                "Recall": round(r, 4),
                "F1": round(f1, 4),
                "Latency": round(latency, 4),
            })

            # Detail rows ‚Äî predicted
            for label, text in sorted(pred_entities):
                all_results.append({
                    "Algorithm": algo_name,
                    "Test_Case": test_case_name,
                    "Phrase_ID": phrase_id,
                    "Phrase": phrase,
                    "Entity_Group": label,
                    "Word": text,
                    "Source": "predicted",
                    "Match": (label, text) in gt_entities,
                })

            # Detail rows ‚Äî missed ground truth
            for label, text in sorted(gt_entities - pred_entities):
                all_results.append({
                    "Algorithm": algo_name,
                    "Test_Case": test_case_name,
                    "Phrase_ID": phrase_id,
                    "Phrase": phrase,
                    "Entity_Group": label,
                    "Word": text,
                    "Source": "ground_truth_only",
                    "Match": False,
                })

        # Per-test-case metrics
        tc_p, tc_r, tc_f1 = compute_metrics(tc_tp, tc_fp, tc_fn)
        all_tc_metrics.append({
            "Algorithm": algo_name,
            "Test_Case": test_case_name,
            "TP": tc_tp, "FP": tc_fp, "FN": tc_fn,
            "Precision": round(tc_p, 4),
            "Recall": round(tc_r, 4),
            "F1": round(tc_f1, 4),
        })
        algo_tp += tc_tp; algo_fp += tc_fp; algo_fn += tc_fn

    # Per-algorithm metrics
    a_p, a_r, a_f1 = compute_metrics(algo_tp, algo_fp, algo_fn)
    all_algo_metrics.append({
        "Algorithm": algo_name,
        "TP": algo_tp, "FP": algo_fp, "FN": algo_fn,
        "Precision": round(a_p, 4),
        "Recall": round(a_r, 4),
        "F1": round(a_f1, 4),
    })

    # Unload model to free memory before loading the next one
    unload_fn()

# ---------- Build DataFrames ----------

df_results = pd.DataFrame(all_results)
df_phrase = pd.DataFrame(all_phrase_metrics)
df_test_case = pd.DataFrame(all_tc_metrics)
df_algo = pd.DataFrame(all_algo_metrics)

print("=" * 60)
print("ALGORITHM-LEVEL METRICS")
print(df_algo.to_string(index=False))
print("=" * 60)
print("\nPER-TEST-CASE METRICS")
print(df_test_case.to_string(index=False))
print("\nPER-PHRASE METRICS")
print(df_phrase.to_string(index=False))

Loading GLiNER model...


You are using a model of type extractor to instantiate a model of type . This is not supported for all configurations of models and can yield errors.


üß† Model Configuration
Encoder model      : microsoft/deberta-v3-large
Counting layer     : count_lstm
Token pooling      : first


In [None]:
print(df_phrase.to_json(orient="records", lines=True), end="")

{"Algorithm":"GLiNER","Test_Case":"label_confusion","Phrase_ID":"label_confusion_1","Phrase":"On Rembrandtplein, at number 19, this mid-sized property offers 40 rooms above a row of caf\u00e9s and bars; Rembrandtplein Hotel is often mistaken for the square itself in ride-hailing apps.","TP":1,"FP":2,"FN":1,"Precision":0.3333,"Recall":0.5,"F1":0.4,"Latency":1.9126}
{"Algorithm":"GLiNER","Test_Case":"label_confusion","Phrase_ID":"label_confusion_2","Phrase":"Just off Oxford Circus, guests check in at a narrow fa\u00e7ade on Oxford Street, although Oxford Street Hotel officially lists its entrance on Ramillies Place.","TP":3,"FP":2,"FN":1,"Precision":0.6,"Recall":0.75,"F1":0.6667,"Latency":2.2828}
{"Algorithm":"GLiNER","Test_Case":"label_confusion","Phrase_ID":"label_confusion_3","Phrase":"The building at 221B Baker Street operates as a small hotel today, with Baker Street Rooms using the famous address despite being unrelated to the literary landmark.","TP":1,"FP":2,"FN":1,"Precision":0.