This code is developed to give in input a wikidata id which has been previously produced by an LLM, accessing wikidata sparql endpoint for list of possibilities matching a specfic label, and then using gpt4 for disambiguation (... using context)
What i need to do next is pass a trig as context, extract id and label, and then based on that context use gpt for disambiguation. 

In [None]:
import re
import requests
from openai import OpenAI
from SPARQLWrapper import SPARQLWrapper, JSON

# Set up OpenAI client
client = OpenAI(api_key='')


def validate_wikidata_id(wikidata_id, expected_label):
    """
    Validate a Wikidata entity ID against an expected label in multiple languages.
    
    Args:
        wikidata_id (str): Wikidata entity ID (without 'Q' prefix)
        expected_label (str): Expected label for the entity
    
    Returns:
        tuple: (is_valid, validated_label)
    """
    wikidata_id = wikidata_id.lstrip("Q")  # Normalize the ID
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    query = f"""
    SELECT ?entityLabel WHERE {{
      BIND(wd:Q{wikidata_id} AS ?entity)
      ?entity rdfs:label ?entityLabel. 
      FILTER(LANG(?entityLabel) IN ("en", "it"))
    }}
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    
    try:
        results = sparql.query().convert()
        for result in results["results"]["bindings"]:
            label = result["entityLabel"]["value"]
            if expected_label.lower() == label.lower():
                return True, label
        return False, None
    except Exception as e:
        print(f"SPARQL Error: {e}")
        return False, None


def search_wikidata_candidates(label):
    """
    Search Wikidata for entities matching the given label, supporting English and Italian.
    
    Args:
        label (str): Label to search for
    
    Returns:
        list: List of candidate entities with valid IDs
    """
    url = "https://www.wikidata.org/w/api.php"
    params = {
        "action": "wbsearchentities",
        "search": label,
        "language": "en",  # Primary search language
        "uselang": "it",  # Fallback to Italian
        "format": "json",
        "type": "item"
    }
    
    try:
        response = requests.get(url, params=params)
        data = response.json()
        return [
            {
                "id": result["id"].lstrip('Q'),  # Normalize ID
                "label": result["label"],  # Primary label
                "description": result.get("description", "")
            } 
            for result in data.get("search", [])
        ]
    except Exception as e:
        print(f"API Error: {e}")
        return []


def gpt_disambiguation(original_id, original_label, candidates):
    """
    Use GPT to disambiguate Wikidata entities, showing labels in English and Italian.
    
    Args:
        original_id (str): Original Wikidata ID
        original_label (str): Original label
        candidates (list): List of candidate entities
    
    Returns:
        str: Suggested Wikidata ID from GPT
    """
    gpt_input = f"""
    The Wikidata ID Q{original_id} with label '{original_label}' could not be validated. 
    Here are some alternative candidates with their descriptions (English and Italian):
    """
    for candidate in candidates:
        gpt_input += f"- {candidate['label']} (Q{candidate['id']}): {candidate['description']}\n"

    gpt_input += "\nWhich one is the correct match? Provide the Wikidata ID."

    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": gpt_input}]
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"GPT Error: {e}")
        return None


def validate_wikidata_entity(wikidata_id, expected_label):
    """
    Validate a Wikidata entity with full error handling and alternative search.
    
    Args:
        wikidata_id (str): Wikidata entity ID (without 'Q' prefix)
        expected_label (str): Expected label for the entity
    
    Returns:
        dict: Validation results
    """
    # Remove 'Q' prefix if present
    wikidata_id = wikidata_id.lstrip('Q')

    print(f"Validating Wikidata entity: Q{wikidata_id} - Expected Label: {expected_label}")

    # Validate Wikidata ID
    is_valid, validated_label = validate_wikidata_id(wikidata_id, expected_label)

    if is_valid:
        return {
            "status": "valid",
            "id": wikidata_id,
            "label": validated_label
        }
    
    print(f"ID Q{wikidata_id} is invalid. Searching for alternatives...")

    # Search for alternatives
    candidates = search_wikidata_candidates(expected_label)
    
    if not candidates:
        return {
            "status": "no_candidates",
            "id": wikidata_id,
            "label": expected_label
        }

    print(f"Candidates found for '{expected_label}':")
    for candidate in candidates:
        print(f"- {candidate['label']} (Q{candidate['id']}): {candidate['description']}")

    # Optional GPT disambiguation (requires OpenAI API key)
    try:
        disambiguation = gpt_disambiguation(wikidata_id, expected_label, candidates)
        if disambiguation:
            print(f"GPT suggested: {disambiguation}")
            # Extract the Wikidata ID from GPT's suggestion
            match = re.search(r'Q(\d+)', disambiguation)
            if match:
                suggested_id = match.group(1)
                return {
                    "status": "disambiguated",
                    "original_id": wikidata_id,
                    "original_label": expected_label,
                    "gpt_suggestion": suggested_id
                }
    except Exception as e:
        print(f"Disambiguation failed: {e}")

    return {
        "status": "ambiguous",
        "id": wikidata_id,
        "label": expected_label,
        "candidates": candidates
    }


# Example usage
if __name__ == "__main__":
    # Example usage with a known Wikidata entity
    result = validate_wikidata_entity("Q7314", "Lake Garda")
    print("\nFinal result:", result)


Validating Wikidata entity: Q7314 - Expected Label: Lake Garda
ID Q7314 is invalid. Searching for alternatives...
Candidates found for 'Lake Garda':
- lago di Garda (Q6414): lago dell'Italia settentrionale
- Lake Garda (Q104594664): dipinto di Curt Agthe
- Lake Garda (Q126974987): dipinto di Anton Hansch
- Lake Garda (Q119444196): painting by Geoffrey Scowcroft Fletcher (1923–2004), Grundy Art Gallery
- Lake Garda (Q119083313): painting by George Clarkson Stanfield (1828–1878), Torre Abbey Museum
- Lake Garda (Verona) (Q21675357): dipinto di Emma Ciardi
- Lake Garda Dam (Q31806307): dam in Hartford County, Connecticut, United States of America
GPT suggested: The correct Wikidata ID for Lake Garda in Italy is Q6414.

Final result: {'status': 'disambiguated', 'original_id': '7314', 'original_label': 'Lake Garda', 'gpt_suggestion': '6414'}
