# Food in Art

In [5]:
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import time

In [6]:
# Initialize SPARQL endpoint
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

# List of museum names (replace with your actual list)
data = pd.read_csv('data/wikidata_paintings_final.csv')
museum_names = data['display_location'].unique().tolist()

# Function to split the museum names list into smaller batches
def batch(iterable, n=5):  # Smaller batch size (e.g., 5)
    for i in range(0, len(iterable), n):
        yield iterable[i:i + n]

# Prepare an empty list to store results
results_list = []

# Function to query data for a batch of museum names
def query_museum_data_batch(museum_batch):
    names_filter = ' '.join(f'"{name}"@en' for name in museum_batch)
    sparql.setQuery(f"""
    SELECT ?museum ?museumLabel ?countryLabel ?locationLabel ?coordinates
    WHERE {{
      VALUES ?museumLabel {{ {names_filter} }}
      ?museum rdfs:label ?museumLabel.
      ?museum wdt:P31 wd:Q33506.
      OPTIONAL {{ ?museum wdt:P17 ?country. }}
      OPTIONAL {{ ?museum wdt:P131 ?location. }}
      OPTIONAL {{ ?museum wdt:P625 ?coordinates. }}
      SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
    }}
    """)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

# Query each batch with error handling and retry logic
for museum_batch in batch(museum_names, n=5):  # Smaller batches
    retry_count = 0
    while retry_count < 3:  # Retry up to 3 times
        try:
            data = query_museum_data_batch(museum_batch)
            for item in data["results"]["bindings"]:
                results_list.append({
                    "museum": item["museumLabel"]["value"],
                    "country": item.get("countryLabel", {}).get("value", None),
                    "location": item.get("locationLabel", {}).get("value", None),
                    "coordinates": item.get("coordinates", {}).get("value", None)
                })
            time.sleep(10)  # Increased delay between batches (10 seconds)
            break  # Exit retry loop if successful
        except Exception as e:
            print(f"Error with batch {museum_batch}: {e}")
            retry_count += 1
            time.sleep(5 * retry_count)  # Exponential backoff: 5s, 10s, 15s

# Convert results to a pandas DataFrame
df = pd.DataFrame(results_list)

# Display the DataFrame
df.head()  # Adjust as needed to see more rows

Error with batch ['National Museum of Fine Arts', 'Ahmanson Building', 'Henie-Onstad Art Centre', 'Museum Ludwig', 'Hall A.35 Leonardo']: HTTP Error 429: Too Many Requests
Error with batch ['San Marcuola', 'Dallas Museum of Art', 'Santa Maria de Manresa', 'Princeton University Art Museum', 'Smithsonian American Art Museum']: HTTP Error 429: Too Many Requests
Error with batch ['Von der Heydt Museum', 'Musée Sainte-Croix', 'Kunstmuseum Basel', 'National Museum of Art, Architecture and Design', 'Isabella Stewart Gardner Museum']: HTTP Error 429: Too Many Requests
Error with batch ['Chapel of Our-Lady-Outside', 'Scuola di San Giorgio degli Schiavoni', 'National Museum of Modern Art', 'Lenbachhaus', 'Kunstmuseum und Kunsthistorisches Seminar (building)']: HTTP Error 429: Too Many Requests
Error with batch ['Room A5 Lorenzetti - Simone Martini', 'Utah Museum of Fine Arts', 'Albertina', '"degenerate art" collection', 'Palazzetto Eucherio Sanvitale']: QueryBadFormed: A bad request has been sen

KeyboardInterrupt: 

In [None]:
df.drop_duplicates(subset='museum', keep='first', inplace=True)

df.to_csv('data/wikidata_museums_final.csv', index=False)