# Food in Art

In [None]:
%load_ext autoreload
%autoreload 2 

In [None]:
import pandas as pd
import os
import requests
from fetching import *

## Fetch initial paintings IDs

### queries

In [None]:
wikidata_base_query = """
SELECT ?item ?author_wikidata ?location_wikidata WHERE {{
?item wdt:P31 wd:Q3305213.
OPTIONAL {{ ?item wdt:P170 ?author_wikidata. }}
OPTIONAL {{ ?item wdt:P276 ?location_wikidata. }}
}}
LIMIT {limit}
OFFSET {offset}
"""


author_query_template = """
SELECT DISTINCT ?{src_column_name} ?author_name ?country ?country_label ?gender ?gender_label 
       ?date_of_birth ?place_of_birth ?place_of_birth_label 
       ?place_of_birth_country ?place_of_birth_country_label 
WHERE {{
    VALUES ?{src_column_name} {{ {qid_list} }}
    
    # Author-specific information here
    OPTIONAL {{
        ?{src_column_name} rdfs:label ?author_name.
        FILTER(LANG(?author_name) = "en")
    }}
    OPTIONAL {{
        ?{src_column_name} wdt:P27 ?country.
        ?country rdfs:label ?country_label.
        FILTER(LANG(?country_label) = "en")
    }}
    OPTIONAL {{
        ?{src_column_name} wdt:P21 ?gender.
        ?gender rdfs:label ?gender_label.
        FILTER(LANG(?gender_label) = "en")
    }}
    OPTIONAL {{
        ?{src_column_name} wdt:P569 ?date_of_birth.
    }}
    OPTIONAL {{
        ?{src_column_name} wdt:P19 ?place_of_birth.
        ?place_of_birth rdfs:label ?place_of_birth_label.
        FILTER(LANG(?place_of_birth_label) = "en")
        OPTIONAL {{
            ?place_of_birth wdt:P17 ?place_of_birth_country.
            ?place_of_birth_country rdfs:label ?place_of_birth_country_label.
            FILTER(LANG(?place_of_birth_country_label) = "en")
        }}
    }}
}}
"""
paintings_query_template = """
    SELECT ?item ?title ?creation_date ?origin_country ?display_country ?type ?school ?time_period ?image_url (GROUP_CONCAT(?depicts_label; separator=", ") AS ?depicts) WHERE {{
    VALUES ?item {{ {qid_list} }}

    # Title of the item
    OPTIONAL {{
        ?item rdfs:label ?title.
        FILTER(LANG(?title) = "en")
    }}
    
    OPTIONAL {{ ?item wdt:P571 ?creation_date. }}
    OPTIONAL {{ ?item wdt:P571 ?creation_date. }}
    
    # Origin country of the item or the author
    OPTIONAL {{
        ?item wdt:P495 ?origin_country_wd.
        ?origin_country_wd rdfs:label ?origin_country.
        FILTER(LANG(?origin_country) = "en")
    }}
    OPTIONAL {{
        ?item wdt:P50 ?author.
        ?author wdt:P27 ?author_country_wd.
        ?author_country_wd rdfs:label ?author_country.
        FILTER(LANG(?author_country) = "en")
    }}
    BIND(COALESCE(?origin_country, ?author_country) AS ?origin_country)
    
    # Display country
    OPTIONAL {{
        ?item wdt:P276 ?display_location_wd.
        OPTIONAL {{
            ?display_location_wd wdt:P17 ?display_country_wd.
            ?display_country_wd rdfs:label ?display_country.
            FILTER(LANG(?display_country) = "en")
        }}
    }}
    
    # Type
    OPTIONAL {{
        ?item wdt:P136 ?type_wd.
        ?type_wd rdfs:label ?type.
        FILTER(LANG(?type) = "en")
    }}
    
    # School or tradition
    OPTIONAL {{
        ?item wdt:P135 ?school_wd.
        ?school_wd rdfs:label ?school.
        FILTER(LANG(?school) = "en")
    }}
    
    # Time period
    OPTIONAL {{
        ?item wdt:P2348 ?time_period_wd.
        ?time_period_wd rdfs:label ?time_period.
        FILTER(LANG(?time_period) = "en")
    }}
    
    # Image URL
    OPTIONAL {{
        ?item wdt:P18 ?image_url.
    }}
    
    # Depicts
    OPTIONAL {{
        ?item wdt:P180 ?depicts_wd.
        ?depicts_wd rdfs:label ?depicts_label.
        FILTER(LANG(?depicts_label) = "en")
    }}
    }}
    GROUP BY ?item ?title ?creation_date ?origin_country ?display_country ?type ?school ?time_period ?image_url
"""

locations_query_template = """
    SELECT ?{src_column_name} ?museum_name ?city ?city_label ?country ?country_label 
           ?founding_date ?museum_type ?museum_type_label ?coordinates ?part_of 
    WHERE {{
        VALUES ?{src_column_name} {{ {qid_list} }}

        OPTIONAL {{
            ?{src_column_name} wdt:P17 ?country.                             # P17 = country
            ?country rdfs:label ?country_label.                              # Get the label for country
            FILTER(LANG(?country_label) = "en")
        }}
        
        OPTIONAL {{
            ?{src_column_name} wdt:P131 ?city.                               # P131 = city
            ?city rdfs:label ?city_label.                                    # Get the label for city
            FILTER(LANG(?city_label) = "en")
        }}
        
        OPTIONAL {{
            ?{src_column_name} wdt:P571 ?founding_date.                      # P571 = founding date
        }}
        
        OPTIONAL {{
            ?{src_column_name} wdt:P31 ?museum_type.                         # P31 = instance of (museum type)
            ?museum_type rdfs:label ?museum_type_label.                      # Get the label for museum type
            FILTER(LANG(?museum_type_label) = "en")
        }}
        
        OPTIONAL {{
            ?{src_column_name} wdt:P625 ?coordinates.                        # P625 = coordinates (retrieves Geo-coordinates)
        }}
        
        OPTIONAL {{
            ?{src_column_name} rdfs:label ?museum_name.                      # Get the museum's name
            FILTER(LANG(?museum_name) = "en")
        }}
        
        OPTIONAL {{
            ?{src_column_name} wdt:P361 ?part_of.                            # P361 = part of (retrieves parent entities)
        }}
    }}
"""

### initial fetch

In [None]:
paintings_ids = fetch_and_process_wikidata('paintings_ids', wikidata_base_query, 'item', max_batches_for_testing=0)


### supplement fetch

In [None]:
get_supplement_from_wikidata('all_painters', 'data/paintings_ids.csv', 'author_wikidata', author_query_template,)
get_supplement_from_wikidata('all_paintings', 'data/paintings_ids.csv', 'item', paintings_query_template,)
get_supplement_from_wikidata('all_locations', 'data/paintings_ids.csv', 'location_wikidata', locations_query_template,)


## Fetch economics indicator

In [None]:
url = 'https://dataverse.nl/api/access/datafile/421302'
file_path = 'data/mpd2023_web.xlsx'

if not os.path.exists(file_path):
    response = requests.get(url)
    with open(file_path, 'wb') as f:
        f.write(response.content)

# Load a specific sheet by name
df = pd.read_excel('data/mpd2023_web.xlsx', sheet_name='Full data')
df

In [None]:
df['decade'] = (df['year'] // 10) * 10
# Grouping data by country and decade to calculate the average 'gdppc' and 'pop' for each group
df_avg = df.groupby(['country', 'decade'])[['gdppc', 'pop']].mean().reset_index()

# Displaying the resulting dataframe
print(df_avg)


In [None]:
# Grouping data by just the 'decade' to calculate the average 'gdppc' and 'pop' for each decade
df_avg_decades = df.groupby('decade')[['gdppc', 'pop']].mean().reset_index()
display(df_avg_decades)

# Fill in missing values using interpolation
df_avg_decades['gdppc'] = df_avg_decades['gdppc'].interpolate(method='polynomial', order=2)
df_avg_decades['pop'] = df_avg_decades['pop'].interpolate(method='polynomial', order=2)
df_avg_decades

In [None]:
#export
df_avg_decades.to_csv('data/gdp_pop_decades.csv', index=False)