# **Popolazione del CSV con informazioni ricavate dai dati richies**

In [3]:
import csv
import datetime
from SPARQLWrapper import SPARQLWrapper, JSON
import pysparql_anything as sa


In [None]:

# Function to query Wikidata for author information
def query_author_data(author_name):
    """
    Queries Wikidata to get information about an author.
    Args:
        author_name (str): The name of the author to query.

    Returns:
        tuple: A tuple containing the author's gender and birth year.
               If the data is not found, returns ("Unknown", "Unknown").
    """
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")  # Initialize SPARQL endpoint

    # SPARQL query to retrieve gender and birth year of the author
    query = f"""
    SELECT ?genderLabel ?birthYear WHERE {{
        ?author ?label "{author_name}"@en.  # Match the author's name in English
        ?author wdt:P21 ?gender;            # Retrieve gender (property P21)
               wdt:P569 ?birthDate.         # Retrieve birth date (property P569)
        BIND(YEAR(?birthDate) AS ?birthYear) # Extract the year from the birth date

        SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en". }} # Ensure labels are in English
    }}
    LIMIT 1
    """

    sparql.setQuery(query)  # Set the query to the SPARQL wrapper
    sparql.setReturnFormat(JSON)  # Set the return format to JSON
    results = sparql.query().convert()  # Execute the query and convert results to JSON

    if results["results"]["bindings"]:  # Check if any results were returned
        result = results["results"]["bindings"][0]  # Get the first result
        gender = result["genderLabel"]["value"]  # Extract the gender label
        birth_year = int(result["birthYear"]["value"])  # Extract the birth year as an integer
        return gender, birth_year
    else:
        return "Unknown", "Unknown"  # Return "Unknown" if no data is found

# Function to enrich a CSV file with additional author information
def enrich_csv(input_file, output_file):
    """
    Reads an input CSV file, queries author information from Wikidata,
    and writes a new CSV file enriched with gender and birth year data.

    Args:
        input_file (str): Path to the input CSV file.
        output_file (str): Path to the output CSV file.
    """
    # Open the input CSV file for reading
    with open(input_file, mode='r', encoding='utf-8') as infile:
        reader = csv.DictReader(infile)  # Create a CSV DictReader to read rows as dictionaries
        rows = list(reader)  # Convert rows to a list for processing

    # Define the new fields to be added to the output file
    fieldnames = reader.fieldnames + ["Gender", "Author Birth Year"]

    # Open the output CSV file for writing
    with open(output_file, mode='w', encoding='utf-8', newline='') as outfile:
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)  # Create a CSV DictWriter
        writer.writeheader()  # Write the header row to the output file

        # Process each row in the input file
        for row in rows:
            author = row["Nome dell'autore dell'opera"]  # Get the author's name from the current row
            gender, birth_year = query_author_data(author)  # Query Wikidata for gender and birth year

            # Add the queried data to the current row
            row["Gender"] = gender
            row["Author Birth Year"] = birth_year

            writer.writerow(row)  # Write the updated row to the output file

# Example usage of the enrich_csv function
enrich_csv(
    input_file="Argomento Opera (Risposte) - Risposte del modulo 1.csv",  # Input CSV file path
    output_file="work_topics.csv"  # Output CSV file path
)


In [4]:
engine = sa.SparqlAnything()

In [None]:

engine.run(query='../queries/work_construct.sparql',output='../output/work_construct.ttl',format='ttl')

In [43]:
engine.run(query='../queries/author_construct.sparql',output='../output/author_construct.ttl',format='ttl')

In [6]:
engine.run(query='../queries/books_construct.sparql',output='../output/books_construct.ttl',format='ttl')