In [1]:
# Install and import packages

import spacy
import pandas as pd
import random
import time
import csv
from geopy.geocoders import Nominatim
import folium
import plotly.express as px

In [None]:
# Define a Named Entity Recognition (NER) Function and filter out region names

nlp = spacy.load('en_core_web_sm')

df = pd.read_csv("metexhibitions_2015-2024.csv")

titles = df['Title'].tolist()

def extract_geographical_entities(titles):
    location_entities = []
    for title in titles:
        doc = nlp(title)
        locations = [ent.text for ent in doc.ents if ent.label_ in ['GPE', 'LOC']]
        if locations:
            location_entities.append((title, locations))
    return location_entities

location_entities = extract_geographical_entities(titles)
print(location_entities)
print(len(location_entities))


[('Asian Art at 100: A History in Photographs', ['Photographs']), ('New Discoveries: Early Liturgical Textiles from Egypt, 200–400', ['Egypt']), ('Korea', ['Korea']), ('P.S. Art 2015', ['P.S.']), ('China: Through the Looking Glass', ['China']), ('Sultans of Deccan India, 1500–1700: Opulence and Fantasy', ['India']), ('Bazm and Razm', ['Razm']), ('El Greco in New York', ['New York']), ('Treasures from India', ['India']), ('Kimono', ['Kimono']), ('Assyria to Iberia at the Dawn of the Classical Age', ['Assyria', 'Iberia']), ('The Aesthetic Movement in America', ['America']), ('Asian Art at 100: A History in Photographs', ['Photographs']), ("City of Memory: William Chappel's Views of Early 19th-Century New York", ['New York']), ('Velázquez Portraits: Truth in Painting', ['Painting']), ('Max Beckmann in New York', ['New York']), ('The Arts of Nepal and Tibet: Recent Gifts', ['Nepal', 'Tibet']), ('Jerusalem 1000–1400: Every People Under Heaven', ['Jerusalem']), ('P.S. Art 2016', ['P.S.']), (

In [None]:
# Initialize the geolocator
geolocator = Nominatim(user_agent="exhibition_map")

# Function to get coordinates of a location
def get_coordinates(location):
    try:
        geocode = geolocator.geocode(location)
        if geocode:
            return geocode.latitude, geocode.longitude
    except Exception as e:
        print(f"Error for {location}: {e}")
    return None, None  # Return None if not found or an error occurs


# Open the CSV file for writing
with open("exhibition_locations.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["Title", "Location", "Latitude", "Longitude"])  # Write header

    # Fetch coordinates for each location
    for title, locations in location_entities:
        for location in locations:
            lat, lon = get_coordinates(location)
            time.sleep(1)  # Prevent rate limiting

            if lat is not None and lon is not None:
                writer.writerow([title, location, lat, lon])


In [None]:
# Create a world map using Folium

m = folium.Map(location=[20, 0], zoom_start=2)
# Read the CSV and add markers
with open("exhibition_locations.csv", mode="r", encoding="utf-8") as file:
    reader = csv.reader(file)
    next(reader)  # Skip header

    for row in reader:
        title, location, lat, lon = row
        lat, lon = float(lat), float(lon)
        
        jitter_scale = 0.3
        lat += random.uniform(-jitter_scale, jitter_scale)
        lon += random.uniform(-jitter_scale, jitter_scale)

        # Create a custom icon
        custom_icon = folium.CustomIcon(icon_image="museum_icon3.png", icon_size=(23, 30))

        # Add a marker with popup and tooltip
        folium.Marker(
            location=[lat, lon],
            popup=f"<b>{title}</b><br>{location}",
            tooltip=title,
            icon=custom_icon
        ).add_to(m)

# Save the map as an HTML file
m.save("exhibition_map.html")


In [None]:
# Create a treemap using popularexhibitions.csv

df = pd.read_csv("popularexhibitions.csv")
df["Year"] = df["Year"].astype(str)

fig = px.treemap(df,
                path=['Year', 'Title'],  # Hierarchy: Year → Exhibition Title
                values='Visitor',
                color='Visitor',  # Color based on visitor count for gradient effect
                color_continuous_scale='Blues'  # Change to another scale if preferred
                )
  

# Show the plot
fig.show()