In [1]:
!pip install spacy folium geopy beautifulsoup4 requests
!python -m spacy download en_core_web_sm


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m
Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m47.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [2]:
import spacy
import requests
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
from time import sleep
import csv

nlp = spacy.load("en_core_web_sm")
nlp.max_length = 2_000_000
geolocator = Nominatim(user_agent="myth_mapper")

# Scrape Apollodorus
url = "https://www.theoi.com/Text/HesiodTheogony.html"
soup = BeautifulSoup(requests.get(url).text, "html.parser")
hesiod_text = soup.get_text(separator=" ")


sources = {
    "Hesiod": hesiod_text
}

myth_entities = {
    "Zeus": ["Zeus"],
    "Apollo": ["Apollo", "Phoebus"],
    "Athena": ["Athena", "Pallas"],
    "Artemis": ["Artemis"],
    "Hades": ["Hades", "Plouton"],
    "Poseidon": ["Poseidon"],
    "Demeter": ["Demeter"],
    "Hera": ["Hera"],
    "Hermes": ["Hermes"],
    "Ares": ["Ares"],
    "Dionysus": ["Dionysus", "Bacchus"],
    "Hephaestus": ["Hephaestus", "Vulcan"]
}

output_dict = {}

for source_name, raw_text in sources.items():
    doc = nlp(raw_text)
    ents = [(ent.text.strip(), ent.start_char, ent.label_) for ent in doc.ents if ent.label_ in ("GPE", "LOC")]

    for place, pos, _ in ents:
        context_window = raw_text[max(0, pos-200):pos+200].lower()
        associated_god = None
        for god, aliases in myth_entities.items():
            if any(alias.lower() in context_window for alias in aliases):
                associated_god = god
                break

        if not associated_god:
            continue

        key = (place, associated_god)

        if key not in output_dict:
            try:
                loc = geolocator.geocode(place)
                if loc:
                    output_dict[key] = {
                        "Place": place,
                        "Latitude": loc.latitude,
                        "Longitude": loc.longitude,
                        "God": associated_god,
                        "Source": source_name,
                        "Frequency": 1
                    }
                    sleep(1)
            except:
                continue
        else:
            output_dict[key]["Frequency"] += 1

# Convert dictionary to list
output_data = list(output_dict.values())

# Optional: Write to CSV
with open("hesiod.csv", "w", newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["Place", "Latitude", "Longitude", "God", "Source", "Frequency"])
    writer.writeheader()
    writer.writerows(output_data)

