In [2]:
from transformers import pipeline
import pandas as pd

# Test Cases

## Label Ambiguity / Label Confusion

In [5]:
label_confusion = [
    "On Rembrandtplein, at number 19, this mid-sized property offers 40 rooms above a row of cafés and bars; Rembrandtplein Hotel is often mistaken for the square itself in ride-hailing apps.",
    "Just off Oxford Circus, guests check in at a narrow façade on Oxford Street, although Oxford Street Hotel officially lists its entrance on Ramillies Place.",
    "The building at 221B Baker Street operates as a small hotel today, with Baker Street Rooms using the famous address despite being unrelated to the literary landmark.",
    "Facing Dam Square and the Royal Palace, rooms here are marketed under the name Dam Square Hotel, even though the registered address is Damrak 7.",
    "Although its windows overlook Museumplein, the property is entered via Honthorststraat; Museumplein Residence uses the square name primarily for branding.",
    "Travelers often assume Grand Canal refers to the waterway, but Grand Canal Hotel occupies a converted palazzo on Fondamenta San Simeon Piccolo.",
    "Near the Colosseum metro stop, check-in takes place on Via dei Fori Imperiali, while the accommodation itself is branded as Colosseum Metro Hotel.",
    "Marketed as Eiffel Tower Avenue, the hotel is actually set back from the landmark, with its street address listed on Avenue de la Bourdonnais.",
    "Union Square appears prominently in the property name, yet Union Square Suites is located on Post Street, several blocks west of the plaza.",
    "Although commonly referred to as Times Square, the hotel's reception and mailing address are on West 46th Street, where Times Square Suites occupies the upper floors."
]

## Rephrasing Sensitivity

In [6]:
rephrasing_sensitivity = [
   "I have a reservation at the Rembrandt Square Hotel on Rembrandt Square.",
   "I'm booked at Rembrandt Square Hotel, located in Rembrandt Square.",
   "I have a confirmed booking at Rembrandt Square Hotel in Rembrandt Square.",
   "I'll be staying at the Rembrandt Square Hotel at Rembrandt Square.",
   "I have accommodation booked at Rembrandt Square Hotel on Rembrandt Square.",
   "I have a hotel booking at Rembrandt Square Hotel, Rembrandt Square.",
   "I'm staying at the Rembrandt Square Hotel in the Rembrandt Square area.",
   "I've made a reservation at Rembrandt Square Hotel at Rembrandt Square.",
   "My booking is at the Rembrandt Square Hotel, right on Rembrandt Square.",
   "I have a confirmed stay at Rembrandt Square Hotel on Rembrandt Square.",
   "I've booked a room at Rembrandt Square Hotel in Rembrandt Square.",
   "I have lodging reserved at Rembrandt Square Hotel, located at Rembrandt Square.",
   "I have a room booked at the Rembrandt Square Hotel at Rembrandt Square.",
   "I'm checked in at Rembrandt Square Hotel in Rembrandt Square.",
   "I have a hotel reservation for Rembrandt Square Hotel at Rembrandt Square."]


## Placement Sensitivity

In [7]:
placement_sensitivity = [
    "The Rembrandt Square Hotel located at Rembrandt Square offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "Located at Rembrandt Square, the Rembrandt Square Hotel offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "Offering comfortable accommodation in the heart of the city for leisure and business travelers, the Rembrandt Square Hotel is located at Rembrandt Square.",
    "Comfortable accommodation in the heart of the city for leisure and business travelers is offered by the Rembrandt Square Hotel located at Rembrandt Square.",
    "For leisure and business travelers, comfortable accommodation in the heart of the city is offered by the Rembrandt Square Hotel located at Rembrandt Square.",
    "The Rembrandt Square Hotel offers comfortable accommodation in the heart of the city for leisure and business travelers and is located at Rembrandt Square.",
    "At Rembrandt Square, the Rembrandt Square Hotel offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "Comfortable accommodation in the heart of the city for leisure and business travelers is offered at Rembrandt Square by the Rembrandt Square Hotel.",
    "The Rembrandt Square Hotel, offering comfortable accommodation in the heart of the city for leisure and business travelers, is located at Rembrandt Square.",
    "Offering comfortable accommodation in the heart of the city for leisure and business travelers, the Rembrandt Square Hotel located at Rembrandt Square."
]

## Variable Entity Length

In [8]:
variable_entity_length = [
    "The Hotel Six located at 4 Oak St offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "The Grand Harbor Hotel located at 221B Baker Street offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "The The Royal Crescent Boutique & Spa Hotel located at 18-24 Royal Crescent, Bath BA1 2LS offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "The M Hotel located at 9 Rue du Bac offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "The The St. Augustine Waterfront Conference Hotel & Suites located at 500 Avenida Menendez, St. Augustine, FL 32084 offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "The Noor located at 1 Al Noor Rd offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "The Hotel de l'Opéra located at 2 Place de l'Opéra offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "The The International Travelers' Rest Hotel, Residences, and Extended Stay located at 7777 West Great Northern Boulevard, Building C, Suite 120 offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "The Ivy located at 10 Downing St offers comfortable accommodation in the heart of the city for leisure and business travelers.",
    "The The Fjordview Mountain Lodge & Northern Lights Retreat located at Kilometer 12, Scenic Route 9, Longyear Valley offers comfortable accommodation in the heart of the city for leisure and business travelers."
]

## Discontinuous Entity Handling Errors

In [9]:
discontinuous_entity_handling = [
    "The Riverside Hotel is located on Market Street, just behind the central railway station, at number 128, making it easy to reach from anywhere in the city.",
    "Situated on Boulevard Saint-Michel, a short walk from the Sorbonne and directly opposite the public gardens, the hotel can be found at No. 22.",
    "This modern business hotel stands on King Street, between the old courthouse and the financial district, with its main entrance at building 77.",
    "Guests will find the property on Via Roma, adjacent to the historic opera house and near the main shopping arcade, at street number 45.",
    "The hotel occupies a quiet spot on Elm Road, just past the city library and before the river bridge, at 310 Elm Road.",
    "Located on Avenida Paulista, close to the metro station and across from the art museum, the hotel's official address is number 1578.",
    "The boutique hotel sits on Baker Street, a few doors down from the famous museum and next to a small café, at No. 221B.",
    "You'll find the hotel on Queen's Road, near the waterfront promenade and behind the convention center, with the address listed as 89 Queen's Road.",
    "Set on Friedrichstraße, within walking distance of Checkpoint Charlie and surrounded by shops, the hotel is registered at number 102.",
    "The property is located on Ocean Drive, directly facing the beach and beside the historic lifeguard station, at 1400 Ocean Drive."
]

## Case Sensitivity Errors

In [10]:
case_sensitivity_errors = [
    {
        "canonical": "The Riverside Hotel is located at 128 Market Street, near the central railway station.",
        "case_variant": "the riverside hotel is located at 128 market street, near the central railway station."
    },
    {
        "canonical": "Guests can find the Grand Palace Hotel on King Street at Number 77, opposite the courthouse.",
        "case_variant": "guests can find the grand palace hotel on king street at number 77, opposite the courthouse."
    },
    {
        "canonical": "The Sunrise Boutique Hotel is situated on Ocean Drive at No. 502, facing the waterfront.",
        "case_variant": "the sunrise boutique hotel is situated on ocean drive at no. 502, facing the waterfront."
    },
    {
        "canonical": "The Linden Tree Hotel is located at 310 Elm Road, close to the university campus.",
        "case_variant": "the linden tree hotel is located at 310 elm road, close to the university campus."
    },
    {
        "canonical": "Visitors will find the Heritage Hotel at 221B Baker Street, next to the historic museum.",
        "case_variant": "visitors will find the heritage hotel at 221b baker street, next to the historic museum."
    },
    {
        "canonical": "The City View Hotel stands on Queen's Road at Number 89, beside the convention center.",
        "case_variant": "the city view hotel stands on queen's road at number 89, beside the convention center."
    },
    {
        "canonical": "The Urban Stay Hotel is located on Avenida Paulista at 1578, close to the metro station.",
        "case_variant": "the urban stay hotel is located on avenida paulista at 1578, close to the metro station."
    },
    {
        "canonical": "The Seaside Hotel is positioned at 1400 Ocean Drive, directly facing the beach.",
        "case_variant": "the seaside hotel is positioned at 1400 ocean drive, directly facing the beach."
    },
    {
        "canonical": "The Mountain Lodge Hotel is found on Alpine Way at No. 64, near the ski lift.",
        "case_variant": "the mountain lodge hotel is found on alpine way at no. 64, near the ski lift."
    },
    {
        "canonical": "The Royal Heights Hotel operates from 45 Victoria Street, across from Central Park.",
        "case_variant": "the royal heights hotel operates from 45 victoria street, across from central park."
    }
]

## Spelling Errors / Typos

In [11]:
spelling_errors = [
    "The Grand Hiltno is a luxury hotel located at 123 Brodway Avnue, New Yrok. This Hiltno proprty offers spacious roms, a fintess cneter, and easy acess to major atrractions.",
    "Stay at the Marrioot Marquis in downtown Chciago, situatd at 456 Michgan Avene. The Marrioot is known for its premuim service and conveinent loction near the river.",
    "The Hyatt Regncy San Frnacisco can be found at 789 Embarcdero Rd. This Hyat hotel feautres bay vews, modern metting spaces, and renownd dinning optons.",
    "Enjoy a comfrtable stay at the Sheratonn Times Sqaure Hotel, located at 811 7th Avnue, New Yrok City. The Sheratonn offers recently renovted roms and a larg indoor pool.",
    "The Intercontinetal Londn Park Lane is a prestigous hotel at 1 Park Ln, Londn. This Interconinental propety is famos for its elegent suites and execuative lounge.",
    "Book your visit at the Ritz-Carlonn Los Angles, locaed at 900 W Olympc Blvd. The Ritz-Carlon offers luxury aminities and panoramic city veiwes.",
    "The Westin St. Fransis San Frnacisco is located at 335 Powel Stret. This Westn hotel provides the Heavenl Bed experince and close proxmity to Union Sqaure.",
    "Experience confort at the Holidy Inn Exprss Boston, situatd at 69 Bexer Stret. This Holidy Inn Exprss includes complmentary brakfast and frre WiFi.",
    "The DoubleTree by Hliton Amsterdm Centrl Sttion can be found at Oosterdoksstrat 4. This DoubleTee hotel is known for its warm cookie welcomme and modern facilties.",
    "Stay at the Radisson Blu Berln Alexnderplatz, located at Karl-Libknecht-Strse 3. The Radisson Bllu hotel offers stylish roms and a rooftop welness area."
]

# Modeling

In [None]:
test_cases = [label_confusion, rephrasing_sensitivity, placement_sensitivity, variable_entity_length, discontinuous_entity_handling, spelling_errors]
test_cases_names = ["label_confusion", "rephrasing_sensitivity", "placement_sensitivity", "variable_entity_length", "discontinuous_entity_handling", "spelling_errors"]

ner = pipeline("ner", aggregation_strategy="simple", model="51la5/roberta-large-NER")

for test_case, test_case_name in zip(test_cases, test_cases_names):

    results = ner(test_case)

    formatted_results = []
    for i, (phrase, entities) in enumerate(zip(test_case, results)):
        if entities:
            for entity in entities:
                formatted_results.append({
                    "Test_Case": test_case_name,
                    "Phrase_ID": i + 1,
                    "Phrase": phrase,
                    "Entity_Group": entity['entity_group'],
                    "Word": entity['word'],
                    "Score": f"{entity['score']:.4f}",
                    "Start": entity['start'],
                    "End": entity['end']
                })
        else:
            formatted_results.append({
                "Test_Case": test_case_name,
                "Phrase_ID": i + 1,
                "Phrase": phrase,
                "Entity_Group": "N/A",
                "Word": "N/A",
                "Score": "N/A",
                "Start": "N/A",
                "End": "N/A"
            })

    df = pd.DataFrame(formatted_results)
    print(df.to_json(orient="records", lines=True))



Loading weights:   0%|          | 0/391 [00:00<?, ?it/s]

XLMRobertaForTokenClassification LOAD REPORT from: 51la5/roberta-large-NER
Key                         | Status     |  | 
----------------------------+------------+--+-
roberta.pooler.dense.bias   | UNEXPECTED |  | 
roberta.pooler.dense.weight | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


{"Test_Case":["On Rembrandtplein, at number 19, this mid-sized property offers 40 rooms above a row of caf\u00e9s and bars; Rembrandtplein Hotel is often mistaken for the square itself in ride-hailing apps.","Just off Oxford Circus, guests check in at a narrow fa\u00e7ade on Oxford Street, although Oxford Street Hotel officially lists its entrance on Ramillies Place.","The building at 221B Baker Street operates as a small hotel today, with Baker Street Rooms using the famous address despite being unrelated to the literary landmark.","Facing Dam Square and the Royal Palace, rooms here are marketed under the name Dam Square Hotel, even though the registered address is Damrak 7.","Although its windows overlook Museumplein, the property is entered via Honthorststraat; Museumplein Residence uses the square name primarily for branding.","Travelers often assume Grand Canal refers to the waterway, but Grand Canal Hotel occupies a converted palazzo on Fondamenta San Simeon Piccolo.","Near the C

In [None]:
df.to_csv("results.csv", index=False)