### Import evaluation dataset and dependecies

Records is a list of dictionaries. Each dictionary specifies the original language of the article, metadata such as title and abstract, and a list of keywords. Each keyword is a dictionary with specified: the label, the wikidata_url, and the match. This is present even if the keyword is not in the original language of the article. 

In [1]:
from src.utils import parse_excel_file, compute_precision, compute_recall
from src.clients import OpenAIClient, AnthropicClient, GroqClient, OpenAIWebSearchClient
from src.pipelines import EntityExtractionPipeline
import os
import uuid
from datetime import datetime
import json

In [2]:
total_records = parse_excel_file("./data/Dset_Eval_KW_Alignment_Eval_def.xlsx")
records = total_records[:10]

### Definition of the client

Run the load_dotenv() for load all the env variables and run the cell with your client and 

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
model = 'openai'
client = OpenAIClient(api_key=os.getenv("OPENAI_API_KEY"), model_name="gpt-4o-mini")

In [None]:
model = 'openai_websearch'
client = OpenAIWebSearchClient(api_key=os.getenv("OPENAI_API_KEY"), model_name="gpt-4o-search-preview")

In [None]:
model = 'anthropic'
client = AnthropicClient(api_key=os.getenv("ANTHROPIC_API_KEY"), model_name="claude-opus-4-20250514")

In [None]:
model = 'groq'
client = GroqClient(api_key=os.getenv("GROQ_API_KEY"), model_name="llama-3.1-8b-instant")

### 🔄 Pipeline Initialization

This cell runs the entity extraction pipeline using the input `.xlsx` file. It processes each article in the dataset and does the following for every keyword:

1. **Generate Entity Candidates**
   For each keyword, it generates up to 10 possible matching entities from Wikidata (default value set by `NUM_NAMES`).

2. **Select Relevant Entity**
   From the candidates, the model selects the best matching entity (by default, only 1 is selected).

3. **Track Progress and Save Results**
   The results are saved in a `.json` file with the original article data enriched by the LLM annotations for each keyword.

In [5]:
run_id = str(uuid.uuid4())

In [6]:
pipeline = EntityExtractionPipeline(client)

In [7]:
SAVE_EVERY = 3
MAX_WORKERS = 10
base_output_dir = './data/evaluation_output'
base_filename = f'eval_{model}_{run_id}'
results_file = os.path.join(base_output_dir, f"{base_filename}.json")
adjusted_file = os.path.join(base_output_dir, f"{base_filename}_adjusted.json")

In [8]:
import json
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import copy

file_lock = threading.Lock()

def process_keyword(record_data):
    record_idx, kw_idx, language, title_or, abstract_or, kw_label = record_data
    
    try:
        llm_uris = pipeline.extract_entities(
            language=language,
            title=title_or,
            abstract=abstract_or,
            keywords=kw_label
        )
        return record_idx, kw_idx, llm_uris, None
    except Exception as e:
        print(f"LLM URIs cannot be computed for record {record_idx}, keyword {kw_idx}: {e}")
        return record_idx, kw_idx, [], str(e)

tasks_data = []
for record_idx, record in enumerate(records):
    for kw_idx, kw in enumerate(record['kws']):
        task_data = (
            record_idx,
            kw_idx,
            record['language'],
            record['title_eng'],
            record['abstract_eng'],
            kw['label']
        )
        tasks_data.append(task_data)

print(f"Preparati {len(tasks_data)} task da processare")

Preparati 48 task da processare


In [9]:

# Processa in parallelo
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    # Sottometti tutti i task con dati già copiati
    future_to_task = {
        executor.submit(process_keyword, task_data): task_data[0:2]  # record_idx, kw_idx
        for task_data in tasks_data
    }
    
    processed_count = 0
    with tqdm(total=len(tasks_data), desc="Processing keywords") as pbar:
        for future in as_completed(future_to_task):
            record_idx, kw_idx, llm_uris, error = future.result()
            
            # Aggiorna i risultati
            records[record_idx]['kws'][kw_idx]['llm_uris'] = llm_uris
            
            processed_count += 1
            pbar.update(1)
            
            # Salvataggio periodico thread-safe
            if processed_count % SAVE_EVERY == 0:
                with file_lock:
                    # print(f"Saving partial results at {processed_count} processed keywords")
                    with open(results_file, 'w', encoding='utf-8') as f:
                        json.dump(records, f, ensure_ascii=False, indent=2)

# Salvataggio finale
print("All keywords processed, saving final results.")
with open(results_file, 'w', encoding='utf-8') as f:
    json.dump(records, f, ensure_ascii=False, indent=2)

Processing keywords:   0%|          | 0/48 [00:00<?, ?it/s]

Entity: Western riding; Description: style of horseback riding which evolved from the ranching and warfare traditions; URI: http://www.wikidata.org/entity/Q1323015
Entity: Western riding; Description: ; URI: http://www.wikidata.org/entity/Q7988530

Entity: horse; Description: domesticated four-footed mammal from the equine family; URI: http://www.wikidata.org/entity/Q726
Entity: Horsens; Description: city in Horsens Municipality, Denmark; URI: http://www.wikidata.org/entity/Q190235
Entity: horse racing; Description: equestrian sport in which several horses simultaneously race against each other; URI: http://www.wikidata.org/entity/Q187916

Entity: Engineering; Description: international company located in Rome, Italy; URI: http://www.wikidata.org/entity/Q3725352
Entity: engineering; Description: applied science; URI: http://www.wikidata.org/entity/Q11023
Entity: Engineering Mechanics; Description: journal; URI: http://www.wikidata.org/entity/Q96329007
Entity: Technology; Description: 1

Processing keywords:   6%|▋         | 3/48 [00:01<00:21,  2.12it/s]

Entity: Expert; Description: Swiss company that provides support, services and solutions to retailers of consumer electronics; URI: http://www.wikidata.org/entity/Q680990
Entity: expert; Description: person with broad and profound competence in a particular field; URI: http://www.wikidata.org/entity/Q381160
Entity: expert system; Description: computer system that emulates the decision-making ability of a human expert; URI: http://www.wikidata.org/entity/Q184609
Entity: The Specialist; Description: 1994 film by Luis Llosa; URI: http://www.wikidata.org/entity/Q471423
Entity: specialist; Description: person who has completed a commercial or other vocational training, a college or a university degree; URI: http://www.wikidata.org/entity/Q1391344
Entity: specialist; Description: military rank; URI: http://www.wikidata.org/entity/Q1434096
Entity: Authority; Description: Farrar, Straus and Giroux paperback edition; URI: http://www.wikidata.org/entity/Q80550497
Entity: Authority; Description: 

Processing keywords:  12%|█▎        | 6/48 [00:03<00:19,  2.13it/s]

Entity: competition; Description: rivalry where multiple parties strive for a goal which cannot be shared; URI: http://www.wikidata.org/entity/Q476300
Entity: competition; Description: contest for a prize or award; URI: http://www.wikidata.org/entity/Q841654
Entity: competition; Description: term referring either to competition in nature or between contestants; URI: http://www.wikidata.org/entity/Q23807345
Entity: Contest; Description: commune in Mayenne, France; URI: http://www.wikidata.org/entity/Q687442
Entity: Contest; Description: 2013 film directed by Anthony Joseph Giunta; URI: http://www.wikidata.org/entity/Q2995622
Entity: Contest; Description: 1996 novel by Matthew Reilly; URI: http://www.wikidata.org/entity/Q5165147
Entity: tournament; Description: competition involving a relatively large number of competitors; URI: http://www.wikidata.org/entity/Q500834
Entity: tournament; Description: chivalrous competition or mock fight of the Middle Ages and Renaissance; URI: http://www.

Processing keywords:  21%|██        | 10/48 [00:04<00:12,  3.05it/s]

Entity: Thessaloniki; Description: city in Macedonia, Greece; URI: http://www.wikidata.org/entity/Q17151
Entity: Thessaloniki Municipality; Description: municipality of Macedonia, Greece; URI: http://www.wikidata.org/entity/Q6627746
Entity: Thessaloniki Regional Unit; Description: regional unit of Central Macedonia, Greece; URI: http://www.wikidata.org/entity/Q210176
Entity: Salonica Eyalet; Description: Ottoman province; URI: http://www.wikidata.org/entity/Q3854042
Entity: Salonica Vilayet; Description: Ottoman province; URI: http://www.wikidata.org/entity/Q2301221
Entity: Salonika Agreement; Description: 1938 treaty between the Balkan Entente and Bulgaria; URI: http://www.wikidata.org/entity/Q10513368
Entity: Thessalonica; Description: Byzantine district (theme); URI: http://www.wikidata.org/entity/Q2121803
Entity: Thessalonica; Description: The mint at the ancient site of Thessalonica in Macedonia; URI: http://www.wikidata.org/entity/Q4024000
Entity: Thessaloniki; Description: city 

Processing keywords:  23%|██▎       | 11/48 [00:05<00:14,  2.64it/s]

Entity: Byzantine period in the history of Bulgaria; Description: ; URI: http://www.wikidata.org/entity/Q12275254
Entity: Byzantine-period dish with painted decoration; Description: clay bowl of the Byzantine Egypt, 395–642 CE, in the collection of Museo Egizio, Turin, Italy (S.2106); URI: http://www.wikidata.org/entity/Q117423418
Entity: Byzantine-period dish painted with floral motifs; Description: clay bowl of the Egyptian Roman Period – Byzantine Period, 200–400 CE, in the collection of Museo Egizio, Turin, Italy; URI: http://www.wikidata.org/entity/Q117391444
Entity: Byzantine Empire; Description: Roman Empire during Late Antiquity and the Middle Ages; URI: http://www.wikidata.org/entity/Q12544
Entity: history of the Byzantine Empire; Description: aspect of history; URI: http://www.wikidata.org/entity/Q2993777
Entity: Byzantine Empire under the Amorian dynasty; Description: Byzantine Empire from 820 to 867; URI: http://www.wikidata.org/entity/Q474124
Entity: Ottoman Empire; Descri

Processing keywords:  25%|██▌       | 12/48 [00:05<00:15,  2.34it/s]

Entity: Ottoman Empire; Description: multiethnic empire with the monarchs from the Ottoman dynasty (1299–1922); URI: http://www.wikidata.org/entity/Q12560
Entity: Ottoman Empire; Description: scholarly chapter; URI: http://www.wikidata.org/entity/Q129875483
Entity: history of the Ottoman Empire; Description: aspect of history; URI: http://www.wikidata.org/entity/Q2564225



Processing keywords:  29%|██▉       | 14/48 [00:06<00:13,  2.47it/s]

Entity: Hutu; Description: ethnic group native to the Great Lakes region of Africa; URI: http://www.wikidata.org/entity/Q192647
Entity: urocanate hydratase YPO4017; Description: microbial gene found in Yersinia pestis CO92; URI: http://www.wikidata.org/entity/Q22568417
Entity: urocanate hydratase SSA_0435; Description: microbial gene found in Streptococcus sanguinis SK36; URI: http://www.wikidata.org/entity/Q23401967
Entity: Rwandan Genocide memorial; Description: ; URI: http://www.wikidata.org/entity/Q96057729
Entity: Rwandan genocide denial; Description: pseudohistorical assertion that the Rwandan genocide did not occur, specifically rejection of the scholarly consensus that Rwandan Tutsis were the victims of genocide between 7 April and 19 July 1994; URI: http://www.wikidata.org/entity/Q7383741
Entity: Rwandan Genocide burial ground, Kasensero; Description: ; URI: http://www.wikidata.org/entity/Q122143126
Entity: Central Africa; Description: core region of the African continent; URI

Processing keywords:  31%|███▏      | 15/48 [00:07<00:17,  1.88it/s]

Entity: trade; Description: economic branch involving the exchange of goods and services; URI: http://www.wikidata.org/entity/Q601401
Entity: trademark; Description: trade identifier of products or services; URI: http://www.wikidata.org/entity/Q167270
Entity: trade unionist; Description: person involved in trade unions; member or supporter of a trade union; URI: http://www.wikidata.org/entity/Q15627169
Entity: commerce; Description: large-scale organized system related to the exchange of goods and services; URI: http://www.wikidata.org/entity/Q26643
Entity: Commerce; Description: city in Hunt County, Texas, United States; URI: http://www.wikidata.org/entity/Q979151
Entity: Commerce; Description: artwork by Jean-Antoine Injalbert on pont Mirabeau, Paris, France; URI: http://www.wikidata.org/entity/Q23459743
Entity: marketplace; Description: space in which a market operates; URI: http://www.wikidata.org/entity/Q330284
Entity: Marketplace; Description: podcast; URI: http://www.wikidata.or

Processing keywords:  35%|███▌      | 17/48 [00:07<00:12,  2.45it/s]

Entity: marketplace; Description: space in which a market operates; URI: http://www.wikidata.org/entity/Q330284
Entity: Marketplace; Description: podcast; URI: http://www.wikidata.org/entity/Q110568178
Entity: Marketplace; Description: painting by Eugen Dikkutt; URI: http://www.wikidata.org/entity/Q104622748
Entity: agora; Description: central public space in ancient Greek city-states; URI: http://www.wikidata.org/entity/Q187909
Entity: Agora; Description: Mexican progressive metal band; URI: http://www.wikidata.org/entity/Q8191631
Entity: Agora; Description: wadi in Agadez Region, Niger; URI: http://www.wikidata.org/entity/Q35329827
Entity: commerce; Description: large-scale organized system related to the exchange of goods and services; URI: http://www.wikidata.org/entity/Q26643
Entity: Commerce; Description: city in Hunt County, Texas, United States; URI: http://www.wikidata.org/entity/Q979151
Entity: Commerce; Description: artwork by Jean-Antoine Injalbert on pont Mirabeau, Paris, 

Processing keywords:  38%|███▊      | 18/48 [00:08<00:14,  2.08it/s]

Entity: marketplace; Description: space in which a market operates; URI: http://www.wikidata.org/entity/Q330284
Entity: Marketplace; Description: podcast; URI: http://www.wikidata.org/entity/Q110568178
Entity: Marketplace; Description: painting by Eugen Dikkutt; URI: http://www.wikidata.org/entity/Q104622748
Entity: bazaar; Description: type of public marketplace; URI: http://www.wikidata.org/entity/Q219760
Entity: Bazaar; Description: 2016 video game; URI: http://www.wikidata.org/entity/Q124063422
Entity: Bazaar; Description: Danish musical group; URI: http://www.wikidata.org/entity/Q4875087
Entity: commercial district; Description: part of a city or town in which the primary land use is commercial activities; URI: http://www.wikidata.org/entity/Q1133961
Entity: Commercial District; Description: fictional district in the 2021 video game Degrees of Lewdity; URI: http://www.wikidata.org/entity/Q123017287
Entity: Hardin Commercial District; Description: historic district in Montana, USA;

Processing keywords:  42%|████▏     | 20/48 [00:09<00:10,  2.67it/s]

Entity: globalization; Description: process of international integration arising from world views, products, ideas, and other aspects of culture; URI: http://www.wikidata.org/entity/Q7181
Entity: Globalization; Description: 2014 album by Pitbull; URI: http://www.wikidata.org/entity/Q17508380
Entity: Globalization; Description: scholarly article published in October 2012; URI: http://www.wikidata.org/entity/Q56476484



Processing keywords:  46%|████▌     | 22/48 [00:09<00:10,  2.55it/s]

Entity: Wharf; Description: wharf in Hicks Bay, New Zealand; URI: http://www.wikidata.org/entity/Q79308728
Entity: wharf; Description: structure on the shore of a harbor or on the bank of a river or canal where ships dock; URI: http://www.wikidata.org/entity/Q828909
Entity: wharf; Description: ; URI: http://www.wikidata.org/entity/Q108867647
Entity: Dock; Description: family name; URI: http://www.wikidata.org/entity/Q37050415
Entity: Docker; Description: open-source software for deploying and running of containerized applications; URI: http://www.wikidata.org/entity/Q15206305
Entity: dock; Description: human-made structure involved in the handling of boats or ships; URI: http://www.wikidata.org/entity/Q124282
Entity: Quay; Description: family name; URI: http://www.wikidata.org/entity/Q37457164
Entity: Quay; Description: human settlement in Oklahoma, United States of America; URI: http://www.wikidata.org/entity/Q2720859
Entity: Quaye; Description: family name; URI: http://www.wikidata.o

Processing keywords:  48%|████▊     | 23/48 [00:10<00:09,  2.73it/s]

Entity: Ireland; Description: country in Northwestern Europe; URI: http://www.wikidata.org/entity/Q27
Entity: Ireland; Description: island in the North Atlantic Ocean; URI: http://www.wikidata.org/entity/Q22890
Entity: Ireland; Description: part of the United Kingdom (1801–1922); URI: http://www.wikidata.org/entity/Q57695350



Processing keywords:  52%|█████▏    | 25/48 [00:11<00:09,  2.33it/s]

Entity: migration; Description: displacement of a human or animal population; URI: http://www.wikidata.org/entity/Q1800545
Entity: Migration; Description: 2017 studio album by Bonobo; URI: http://www.wikidata.org/entity/Q28438088
Entity: Migration; Description: album by The Amboy Dukes; URI: http://www.wikidata.org/entity/Q15980242
Entity: immigration; Description: movement of people into another country or region to which they are not native; URI: http://www.wikidata.org/entity/Q131288
Entity: Immigration; Description: album by Show-Ya; URI: http://www.wikidata.org/entity/Q6004986
Entity: Immigration; Description: Immigration of Economy and Labour Ministry in Bermuda; URI: http://www.wikidata.org/entity/Q113083562
Entity: emigration; Description: act of leaving one's country or region with the intent to settle permanently or temporarily in another; URI: http://www.wikidata.org/entity/Q187668
Entity: Emigration; Description: print in the National Gallery of Art (NGA 173121); URI: http:

Processing keywords:  60%|██████    | 29/48 [00:12<00:05,  3.53it/s]

Entity: Poland; Description: country in Central Europe; URI: http://www.wikidata.org/entity/Q36
Entity: Poland; Description: family name; URI: http://www.wikidata.org/entity/Q7208891
Entity: Poland; Description: badminton player; URI: http://www.wikidata.org/entity/Q63197748
Entity: Polish culture in the Interbellum; Description: ; URI: http://www.wikidata.org/entity/Q7210027
Entity: Polish culture during World War II; Description: Culture during the Nazi cultural genocide of 1939–1944; URI: http://www.wikidata.org/entity/Q1350719
Entity: Polish Culture Society of Edmonton; Description: ; URI: http://www.wikidata.org/entity/Q7209897
Entity: Polish literature; Description: Polish language literature; URI: http://www.wikidata.org/entity/Q692327
Entity: history of Polish literature; Description: ; URI: http://www.wikidata.org/entity/Q113861653
Entity: scholar of Polish literature; Description: specialist in Polish literature; URI: http://www.wikidata.org/entity/Q96186657
Entity: Polish di

Processing keywords:  62%|██████▎   | 30/48 [00:13<00:09,  1.96it/s]

Entity: Social stratification; Description: ; URI: http://www.wikidata.org/entity/Q25461793
Entity: social stratification; Description: society's categorization of its people into hierarchical groups based on socioeconomic factors; URI: http://www.wikidata.org/entity/Q841628
Entity: social stratification; Description: ; URI: http://www.wikidata.org/entity/Q11588296
Entity: social class; Description: group of people categorized in a hierarchy based on socioeconomic factors; URI: http://www.wikidata.org/entity/Q187588
Entity: social status; Description: one's class due to birth; URI: http://www.wikidata.org/entity/Q11255025
Entity: belonging to this social class; Description: inverse property label for P3716; URI: http://www.wikidata.org/entity/Q83854824
Entity: Aristocracy; Description: 1914 film by Thomas N. Heffron; URI: http://www.wikidata.org/entity/Q3622691
Entity: Aristocracy; Description: Ali Project's fifth studio album; URI: http://www.wikidata.org/entity/Q4791012
Entity: Arist

Processing keywords:  67%|██████▋   | 32/48 [00:14<00:08,  1.93it/s]

Entity: Forensics; Description: episode of Cold Case (S7 E9); URI: http://www.wikidata.org/entity/Q52664324
Entity: Forensics; Description: episode of DragonflyTV (S3 E4); URI: http://www.wikidata.org/entity/Q124751673
Entity: Forensics; Description: non fiction book; URI: http://www.wikidata.org/entity/Q109499732
Entity: forensic science; Description: application of science to criminal and civil laws; URI: http://www.wikidata.org/entity/Q495304
Entity: Forensic science; Description: journal; URI: http://www.wikidata.org/entity/Q27710909
Entity: Forensic Science; Description: episode of Modern Marvels (S5 E9); URI: http://www.wikidata.org/entity/Q114548171
Entity: criminalistics; Description: the application of science to criminal and civil laws during criminal investigation; URI: http://www.wikidata.org/entity/Q43504
Entity: Criminalistics and Forensics; Description: ; URI: http://www.wikidata.org/entity/Q96703459
Entity: Criminalistics--science and philosophy; Description: scientific

Processing keywords:  69%|██████▉   | 33/48 [00:14<00:05,  2.50it/s]

Entity: prestige goods; Description: ; URI: http://www.wikidata.org/entity/Q109598912
Entity: Prestige goods in the Neolithic of the Carpathian Basin; Description: scientific article published in August 2004; URI: http://www.wikidata.org/entity/Q73248926
Entity: luxury good; Description: type of desirable good; URI: http://www.wikidata.org/entity/Q949715
Entity: Luxury goods and means of politics; Description: German article from Johannes Wolff, Dresdner Hefte 142; URI: http://www.wikidata.org/entity/Q111046209
Entity: Luxury Goods and the Equity Premium; Description: scientific article published in August 2001; URI: http://www.wikidata.org/entity/Q105851547
Entity: Status symbols. Cleveland rocks at fighting stigma and discrimination; Description: scientific article published on September 1, 2010; URI: http://www.wikidata.org/entity/Q44430536
Entity: Status Symbols: Role and Rank in Seventeenth-century Netherlandish Portraiture; Description: ; URI: http://www.wikidata.org/entity/Q5851

Processing keywords:  73%|███████▎  | 35/48 [00:14<00:03,  3.33it/s]

Entity: Marcus Aemilius Lepidus; Description: Wikimedia disambiguation page; URI: http://www.wikidata.org/entity/Q224839
Entity: Roman Republic; Description: period of ancient Roman civilization (509 BC–27 BC); URI: http://www.wikidata.org/entity/Q17167
Entity: Roman Republic; Description: short-lived state entity declared on February 9, 1849; URI: http://www.wikidata.org/entity/Q1072140
Entity: Roman Republic; Description: republic at Italy between 1798–1799; URI: http://www.wikidata.org/entity/Q175881



Processing keywords:  75%|███████▌  | 36/48 [00:15<00:03,  3.67it/s]

Entity: Institute for Ethnic Studies; Description: research institute in Ljubljana, Slovenia; URI: http://www.wikidata.org/entity/Q12791410
Entity: ethnic studies; Description: academic field focused on ethnic communities and their culture; URI: http://www.wikidata.org/entity/Q41870
Entity: Ethnic Studies Review; Description: official publication of the Association for Ethnic Studies; URI: http://www.wikidata.org/entity/Q106171126
Entity: Ethnic Studies & Psychology; Description: a classroom building in San Francisco State's main campus; URI: http://www.wikidata.org/entity/Q125852144
Entity: national identity; Description: identity or sense of belonging to one state or to one nation; URI: http://www.wikidata.org/entity/Q1880695
Entity: National Identity Award; Description: award in Estonia; URI: http://www.wikidata.org/entity/Q16411889
Entity: National Identity Register; Description: former central database of United Kingdom citizens; URI: http://www.wikidata.org/entity/Q58373154
Entit

Processing keywords:  83%|████████▎ | 40/48 [00:15<00:01,  4.25it/s]

❌ No Wikidata matches found
Entity: culture of Slovenia; Description: pattern of human activity and symbolism associated with Slovenia and its people; URI: http://www.wikidata.org/entity/Q3007297
Entity: ethnic studies; Description: academic field focused on ethnic communities and their culture; URI: http://www.wikidata.org/entity/Q41870
Entity: Ethnic Studies Review; Description: official publication of the Association for Ethnic Studies; URI: http://www.wikidata.org/entity/Q106171126
Entity: ethnic studies scholar; Description: profession; URI: http://www.wikidata.org/entity/Q121333884
Entity: Minority Studies and Christian Studies; Description: academic book chapter; URI: http://www.wikidata.org/entity/Q116053315
Entity: national identity; Description: identity or sense of belonging to one state or to one nation; URI: http://www.wikidata.org/entity/Q1880695
Entity: National Identity Award; Description: award in Estonia; URI: http://www.wikidata.org/entity/Q16411889
Entity: National 

Processing keywords:  90%|████████▉ | 43/48 [00:18<00:02,  2.28it/s]

Entity: Terracina; Description: Italian comune; URI: http://www.wikidata.org/entity/Q128226
Entity: Terracina; Description: family name; URI: http://www.wikidata.org/entity/Q37496426
Entity: Terracina; Description: chief town of the homonym municipality; URI: http://www.wikidata.org/entity/Q30031065
Entity: Ancient Roman theatre; Description: Roman theater in Lisbon, Portugal; URI: http://www.wikidata.org/entity/Q11780
Entity: Ancient Roman theatre; Description: ; URI: http://www.wikidata.org/entity/Q112054994
Entity: Roman theatre; Description: theatre building built in ancient Roman times; URI: http://www.wikidata.org/entity/Q19757
Entity: Marcus Aemilius Lepidus; Description: Wikimedia disambiguation page; URI: http://www.wikidata.org/entity/Q224839
Entity: archaeology; Description: study of the past via material culture; URI: http://www.wikidata.org/entity/Q23498
Entity: Archaeology; Description: magazine about archaeology, published by the Archaeological Institute of America; URI:

Processing keywords:  92%|█████████▏| 44/48 [00:19<00:02,  1.81it/s]

Entity: Latin inscriptions; Description: inscriptions on tapestries in the Applied Arts Collection of Milan, Italy, by Bramantino; URI: http://www.wikidata.org/entity/Q131628832
Entity: Latin inscriptions in Dante's tomb; Description: inscription in the Dante Alighieri's tomb, Ravenna, Italy; URI: http://www.wikidata.org/entity/Q131937085
Entity: Latin Inscriptions: Studies in Measurement and Making; Description: ; URI: http://www.wikidata.org/entity/Q59395786
Entity: Roman Inscriptions 1981–5; Description: ; URI: http://www.wikidata.org/entity/Q60310686
Entity: Roman Inscriptions 1986–90; Description: ; URI: http://www.wikidata.org/entity/Q60310675
Entity: Roman Inscriptions of Britain; Description: 3-volume corpus of inscriptions found in Britain from the Roman period; URI: http://www.wikidata.org/entity/Q3440860
Entity: Epigraphy; Description: sixth chapter in The Oxford Handbook of Roman Studies, 2010; URI: http://www.wikidata.org/entity/Q124353416
Entity: epigraphy; Description: s

Processing keywords:  96%|█████████▌| 46/48 [00:19<00:00,  2.31it/s]

Entity: Latin epigraphy; Description: ; URI: http://www.wikidata.org/entity/Q2352767
Entity: Latin Epigraphy: An Introduction to the Study of Latin Inscriptions. John Edwin Sandys; Description: ; URI: http://www.wikidata.org/entity/Q59735412
Entity: Latin Epigraphy, an Introduction to the Study of Latin Inscriptions. John Edwin Sandys , S. G. Campbell; Description: ; URI: http://www.wikidata.org/entity/Q59724788
Entity: Latin inscriptions; Description: inscriptions on tapestries in the Applied Arts Collection of Milan, Italy, by Bramantino; URI: http://www.wikidata.org/entity/Q131628832
Entity: Latin inscriptions in Dante's tomb; Description: inscription in the Dante Alighieri's tomb, Ravenna, Italy; URI: http://www.wikidata.org/entity/Q131937085
Entity: Latin Inscriptions: Studies in Measurement and Making; Description: ; URI: http://www.wikidata.org/entity/Q59395786
Entity: Ancient inscriptions in Somalia; Description: Undeciphered writing systems in Somalia; URI: http://www.wikidata

Processing keywords: 100%|██████████| 48/48 [00:28<00:00,  1.67it/s]

All keywords processed, saving final results.





The following cell of code contains some required adjustments to the evaluation output for the results analysis. These adjustments mainly involve string parsing. Running this cell is required to obtain the statistics. 

In [21]:
new_records = []

with open(results_file, 'r', encoding='utf-8') as f:
    records = json.load(f)


for record in records:
    new_record = record
    for i, kw in enumerate(record['kws']):
        if len(kw['llm_uris']) == 3 and kw['llm_uris'][1] == "wikidata":
            new_record['kws'][i]['llm_uris'] = ['.'.join(kw['llm_uris'])]
    new_records.append(new_record)

with open(adjusted_file, 'w', encoding='utf-8') as f:
    json.dump(new_records, f, ensure_ascii=False, indent=2)
    

### Evaluation

The following two cells can be used to obtain the statistics. As can be seen, the recall, precision, and the F1 scores are printed. They are printed for all the examples, for language (that is, we distinguish the results based on the original language of the article of the keyword), and by match type (where for match type we consider how match the Wikidata URL matches the actual keyword, this could be 'e' - exact match -, or 'r' - related match).

As can be seen in the example below, the code prints a report with the results. 

In [22]:
with open(adjusted_file, 'r', encoding='utf-8') as f:
    records = json.load(f)

languages = set(record['language'] for record in records)

scores_llm = {
    "Total": {
        "recall": {"Sum": 0, "Size": 0},
        "precision": {"Sum": 0, "Size": 0},
        "f1": {"Sum": 0, "Size": 0}
    },
    "Per_match_type": {
        "e": {
            "recall": {"Sum": 0, "Size": 0},
            "precision": {"Sum": 0, "Size": 0},
            "f1": {"Sum": 0, "Size": 0}
        },
        "r": {
            "recall": {"Sum": 0, "Size": 0},
            "precision": {"Sum": 0, "Size": 0},
            "f1": {"Sum": 0, "Size": 0}
        }
    },
    "Per_language": {
        language: {
            "recall": {"Sum": 0, "Size": 0},
            "precision": {"Sum": 0, "Size": 0},
            "f1": {"Sum": 0, "Size": 0}
        } for language in languages
    }
}

for record in records:
    for i, kw in enumerate(record['kws']):
        if kw['match'] in ("e", "r"):
            correct_uris = [
                url.replace("https", "http").replace("/wiki/", "/entity/")
                for url in kw['wikidata_url']
            ]
            retrieved_uris_llm = kw['llm_uris']
            recall_llm = compute_recall(correct_uris, retrieved_uris_llm)
            precision_llm = compute_precision(correct_uris, retrieved_uris_llm)
            
            # Calcolo del punteggio F1 con controllo per divisione per zero
            if (precision_llm + recall_llm) > 0:
                f1_llm = 2 * precision_llm * recall_llm / (precision_llm + recall_llm)
            else:
                f1_llm = 0

            # Aggiornamento dei punteggi totali
            scores_llm["Total"]["recall"]["Sum"] += recall_llm
            scores_llm["Total"]["recall"]["Size"] += 1
            scores_llm["Total"]["precision"]["Sum"] += precision_llm
            scores_llm["Total"]["precision"]["Size"] += 1
            scores_llm["Total"]["f1"]["Sum"] += f1_llm
            scores_llm["Total"]["f1"]["Size"] += 1

            # Aggiornamento dei punteggi per tipo di match
            scores_llm["Per_match_type"][kw['match']]["recall"]["Sum"] += recall_llm
            scores_llm["Per_match_type"][kw['match']]["recall"]["Size"] += 1
            scores_llm["Per_match_type"][kw['match']]["precision"]["Sum"] += precision_llm
            scores_llm["Per_match_type"][kw['match']]["precision"]["Size"] += 1
            scores_llm["Per_match_type"][kw['match']]["f1"]["Sum"] += f1_llm
            scores_llm["Per_match_type"][kw['match']]["f1"]["Size"] += 1

            # Aggiornamento dei punteggi per lingua
            scores_llm["Per_language"][record['language']]["recall"]["Sum"] += recall_llm
            scores_llm["Per_language"][record['language']]["recall"]["Size"] += 1
            scores_llm["Per_language"][record['language']]["precision"]["Sum"] += precision_llm
            scores_llm["Per_language"][record['language']]["precision"]["Size"] += 1
            scores_llm["Per_language"][record['language']]["f1"]["Sum"] += f1_llm
            scores_llm["Per_language"][record['language']]["f1"]["Size"] += 1


In [23]:
import json

def compute_mean_metrics(stats_dict):
    """
    Dato un dizionario con la struttura:
    {
      'Total': {
        'recall': {'Sum': x, 'Size': y},
        'precision': {'Sum': x2, 'Size': y2},
        'f1': {'Sum': x3, 'Size': y3}
      },
      # 'Per_match_type': {
        'e': {
          'recall': {'Sum': x, 'Size': y},
          'precision': {'Sum': x2, 'Size': y2},
          'f1': {'Sum': x3, 'Size': y3}
        },
        ...
      },
      'Per_language': {
        'fr': {
          'recall': {'Sum': x, 'Size': y},
          'precision': {'Sum': x2, 'Size': y2},
          'f1': {'Sum': x3, 'Size': y3}
        },
        ...
      }
    }

    Restituisce un dizionario con i valori medi di recall, precision e f1.
    """
    mean_dict = {
        'Total': {},
        'Per_match_type': {},
        'Per_language': {}
    }
    
    # --- 1) TOTAL ---
    if 'Total' in stats_dict:
        total_recall_sum = stats_dict['Total']['recall']['Sum']
        total_recall_size = stats_dict['Total']['recall']['Size']
        total_precision_sum = stats_dict['Total']['precision']['Sum']
        total_precision_size = stats_dict['Total']['precision']['Size']
        
        mean_dict['Total']['recall'] = total_recall_sum / total_recall_size if total_recall_size != 0 else 0
        mean_dict['Total']['precision'] = total_precision_sum / total_precision_size if total_precision_size != 0 else 0
        
        if 'f1' in stats_dict['Total']:
            total_f1_sum = stats_dict['Total']['f1']['Sum']
            total_f1_size = stats_dict['Total']['f1']['Size']
            mean_dict['Total']['f1'] = total_f1_sum / total_f1_size if total_f1_size != 0 else 0

    # --- 2) PER MATCH TYPE ---
    if 'Per_match_type' in stats_dict:
        for match_type, metrics in stats_dict['Per_match_type'].items():
            recall_sum = metrics['recall']['Sum']
            recall_size = metrics['recall']['Size']
            precision_sum = metrics['precision']['Sum']
            precision_size = metrics['precision']['Size']
            
            mean_rec = recall_sum / recall_size if recall_size != 0 else 0
            mean_prec = precision_sum / precision_size if precision_size != 0 else 0
            
            mean_dict['Per_match_type'][match_type] = {
                'recall': mean_rec,
                'precision': mean_prec
            }
            if 'f1' in metrics:
                f1_sum = metrics['f1']['Sum']
                f1_size = metrics['f1']['Size']
                mean_dict['Per_match_type'][match_type]['f1'] = f1_sum / f1_size if f1_size != 0 else 0

    # --- 3) PER LANGUAGE ---
    if 'Per_language' in stats_dict:
        for lang, metrics in stats_dict['Per_language'].items():
            recall_sum = metrics['recall']['Sum']
            recall_size = metrics['recall']['Size']
            precision_sum = metrics['precision']['Sum']
            precision_size = metrics['precision']['Size']
            
            mean_rec = recall_sum / recall_size if recall_size != 0 else 0
            mean_prec = precision_sum / precision_size if precision_size != 0 else 0
            
            mean_dict['Per_language'][lang] = {
                'recall': mean_rec,
                'precision': mean_prec
            }
            if 'f1' in metrics:
                f1_sum = metrics['f1']['Sum']
                f1_size = metrics['f1']['Size']
                mean_dict['Per_language'][lang]['f1'] = f1_sum / f1_size if f1_size != 0 else 0
                
    return mean_dict


def print_system_report(system_dict):
    """
    Stampa un report leggibile dei risultati del sistema (LLM) per Total,
    Per_match_type e Per_language includendo F1.
    """
    system_means = compute_mean_metrics(system_dict)
    
    print("======== SYSTEM METRICS REPORT ========")
    
    # --- 1) TOTAL ---
    print("\n--- TOTAL ---")
    total = system_means.get('Total', {})
    if total:
        print(f"LLM Recall:    {total.get('recall', 0):.4f}")
        print(f"LLM Precision: {total.get('precision', 0):.4f}")
        print(f"LLM F1:        {total.get('f1', 0):.4f}")
    else:
        print("No total data found.")
    
    # --- 2) PER MATCH TYPE ---
    print("\n--- PER MATCH TYPE ---")
    per_match_data = system_means.get('Per_match_type', {})
    for mtype, vals in per_match_data.items():
        print(f"\nMatch Type: {mtype}")
        print(f"  LLM Recall:    {vals.get('recall', 0):.4f}")
        print(f"  LLM Precision: {vals.get('precision', 0):.4f}")
        print(f"  LLM F1:        {vals.get('f1', 0):.4f}")
    
    # --- 3) PER LANGUAGE ---
    print("\n--- PER LANGUAGE ---")
    per_lang_data = system_means.get('Per_language', {})
    # Ordinamento convertendo le chiavi in stringa per evitare errori se sono di tipo misto
    for lang, vals in sorted(per_lang_data.items(), key=lambda x: str(x[0])):
        print(f"\nLanguage: {lang}")
        print(f"  LLM Recall:    {vals.get('recall', 0):.4f}")
        print(f"  LLM Precision: {vals.get('precision', 0):.4f}")
        print(f"  LLM F1:        {vals.get('f1', 0):.4f}")


# Esempio di utilizzo:
# Supponiamo che 'scores_llm' contenga i risultati del sistema (LLM)
# Ad esempio, scores_llm è stato popolato precedentemente nel codice.
print_system_report(scores_llm)


--- TOTAL ---
LLM Recall:    0.5590
LLM Precision: 0.5833
LLM F1:        0.5660

--- PER MATCH TYPE ---

Match Type: e
  LLM Recall:    0.6301
  LLM Precision: 0.6585
  LLM F1:        0.6382

Match Type: r
  LLM Recall:    0.1429
  LLM Precision: 0.1429
  LLM F1:        0.1429

--- PER LANGUAGE ---

Language: ar
  LLM Recall:    1.0000
  LLM Precision: 1.0000
  LLM F1:        1.0000

Language: el
  LLM Recall:    0.4286
  LLM Precision: 0.4286
  LLM F1:        0.4286

Language: es
  LLM Recall:    0.7500
  LLM Precision: 0.7500
  LLM F1:        0.7500

Language: fi
  LLM Recall:    0.5556
  LLM Precision: 0.6667
  LLM F1:        0.5833

Language: it
  LLM Recall:    0.5556
  LLM Precision: 0.5556
  LLM F1:        0.5556

Language: pl
  LLM Recall:    0.5000
  LLM Precision: 0.5714
  LLM F1:        0.5238

Language: sl
  LLM Recall:    0.3333
  LLM Precision: 0.3333
  LLM F1:        0.3333

Language: sv
  LLM Recall:    0.5000
  LLM Precision: 0.5000
  LLM F1:        0.5000

Language: 