# Pulling footballer dates of birth from Wiki

In [2]:
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON
from datetime import datetime

def fetch_dob(name):
    # Define the endpoint and query
    endpoint_url = "https://query.wikidata.org/sparql"
    query = f"""
    SELECT ?dob WHERE {{
      ?person wdt:P31 wd:Q5; # instance of human
              wdt:P106 wd:Q937857; # occupation: association football player
              wdt:P569 ?dob; # date of birth
              rdfs:label "{name}"@en. # name of the footballer
    }}
    LIMIT 1
    """
    
    # Initialize the SPARQL wrapper
    sparql = SPARQLWrapper(endpoint_url)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    
    # Execute the query and parse the results
    try:
        results = sparql.query().convert()
        if results["results"]["bindings"]:
            dob = results["results"]["bindings"][0]["dob"]["value"]
            # Remove the time part
            dob = dob.split('T')[0]
            return dob
        else:
            return None
    except Exception as e:
        print(f"Error fetching data for {name}: {e}")
        return None

def calculate_age(dob):
    birth_date = datetime.strptime(dob, '%Y-%m-%d')
    today = datetime.today()
    age = today.year - birth_date.year - ((today.month, today.day) < (birth_date.month, birth_date.day))
    return age

def update_csv(input_file, output_file, name_column):
    # Read the existing CSV file with the correct encoding
    df = pd.read_csv(input_file, encoding='latin1')
    
    # Create a new column for dates of birth
    df['Date of Birth'] = df[name_column].apply(fetch_dob)
    
    # Filter out players not within the age range of 15 to 45
    df['Age'] = df['Date of Birth'].apply(lambda x: calculate_age(x) if pd.notnull(x) else None)
    df_filtered = df[(df['Age'] >= 9) & (df['Age'] <= 51)]
    
    # Drop the 'Age' column as it's no longer needed
    df_filtered.drop(columns=['Age'], inplace=True)
    
    # Save the updated dataframe to the same CSV file
    df_filtered.to_csv(output_file, index=False)

# Specify the input and output file names and the correct name column
input_file = 'players_2016_with_dob.csv'  
output_file = 'players_2016_with_dob.csv' 
name_column = 'name'  

# Update the CSV file
update_csv(input_file, output_file, name_column)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered.drop(columns=['Age'], inplace=True)
