In [6]:
myth_entities = {
    "Zeus": ["Zeus"],
    "Apollo": ["Apollo", "Phoebus"],
    "Athena": ["Athena", "Pallas"],
    "Artemis": ["Artemis"],
    "Hades": ["Hades", "Plouton"],
    "Poseidon": ["Poseidon"],
    "Demeter": ["Demeter"],
    "Hera": ["Hera"],
    "Hermes": ["Hermes"],
    "Ares": ["Ares"],
    "Dionysus": ["Dionysus", "Bacchus"],
    "Hephaestus": ["Hephaestus", "Vulcan"],
    # You can add heroes too, like "Heracles": ["Heracles", "Hercules"]
}


In [7]:
import spacy
import requests
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
from time import sleep
import csv

nlp = spacy.load("en_core_web_sm")
nlp.max_length = 2_000_000
geolocator = Nominatim(user_agent="myth_mapper")

# Scrape Apollodorus
url = "https://www.theoi.com/Text/Apollodorus1.html"
soup = BeautifulSoup(requests.get(url).text, "html.parser")
apollodorus_text = soup.get_text(separator=" ")

# Load Homer texts
odyssey = requests.get("https://www.gutenberg.org/files/1727/1727-0.txt").text
iliad = requests.get("https://www.gutenberg.org/files/6130/6130-0.txt").text

sources = {
    "Apollodorus": apollodorus_text,
    "Odyssey": odyssey,
    "Iliad": iliad
}

myth_entities = {
    "Zeus": ["Zeus"],
    "Apollo": ["Apollo", "Phoebus"],
    "Athena": ["Athena", "Pallas"],
    "Artemis": ["Artemis"],
    "Hades": ["Hades", "Plouton"],
    "Poseidon": ["Poseidon"],
    "Demeter": ["Demeter"],
    "Hera": ["Hera"],
    "Hermes": ["Hermes"],
    "Ares": ["Ares"],
    "Dionysus": ["Dionysus", "Bacchus"],
    "Hephaestus": ["Hephaestus", "Vulcan"]
}

output_dict = {}

for source_name, raw_text in sources.items():
    doc = nlp(raw_text)
    ents = [(ent.text.strip(), ent.start_char, ent.label_) for ent in doc.ents if ent.label_ in ("GPE", "LOC")]

    for place, pos, _ in ents:
        context_window = raw_text[max(0, pos-200):pos+200].lower()
        associated_god = None
        for god, aliases in myth_entities.items():
            if any(alias.lower() in context_window for alias in aliases):
                associated_god = god
                break

        if not associated_god:
            continue

        key = (place, associated_god)

        if key not in output_dict:
            try:
                loc = geolocator.geocode(place)
                if loc:
                    output_dict[key] = {
                        "Place": place,
                        "Latitude": loc.latitude,
                        "Longitude": loc.longitude,
                        "God": associated_god,
                        "Source": source_name,
                        "Frequency": 1
                    }
                    sleep(1)
            except:
                continue
        else:
            output_dict[key]["Frequency"] += 1

# Convert dictionary to list
output_data = list(output_dict.values())

# Optional: Write to CSV
with open("myth_locations_with_frequency.csv", "w", newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=["Place", "Latitude", "Longitude", "God", "Source", "Frequency"])
    writer.writeheader()
    writer.writerows(output_data)



In [8]:
# from collections import defaultdict
# import spacy
# import requests
# from bs4 import BeautifulSoup
# from geopy.geocoders import Nominatim
# from time import sleep
# import csv

# # Load spaCy and geocoder
# nlp = spacy.load("en_core_web_sm")
# nlp.max_length = 2_000_000
# geolocator = Nominatim(user_agent="myth_mapper")

# # Scrape Apollodorus
# soup = BeautifulSoup(requests.get("https://www.theoi.com/Text/Apollodorus1.html").text, "html.parser")
# apollodorus_text = soup.get_text(separator=" ")

# # Homer texts
# odyssey = requests.get("https://www.gutenberg.org/files/1727/1727-0.txt").text
# iliad = requests.get("https://www.gutenberg.org/files/6130/6130-0.txt").text

# sources = {
#     "Apollodorus": apollodorus_text,
#     "Odyssey": odyssey,
#     "Iliad": iliad
# }

# # Gods and aliases
# myth_entities = {
#     "Zeus": ["Zeus"],
#     "Apollo": ["Apollo", "Phoebus"],
#     "Athena": ["Athena", "Pallas"],
#     "Artemis": ["Artemis"],
#     "Hades": ["Hades", "Plouton"],
#     "Poseidon": ["Poseidon"],
#     "Demeter": ["Demeter"],
#     "Hera": ["Hera"],
#     "Hermes": ["Hermes"],
#     "Ares": ["Ares"],
#     "Dionysus": ["Dionysus", "Bacchus"],
#     "Hephaestus": ["Hephaestus", "Vulcan"]
# }

# # Track frequency of (Place, God) pairs
# pair_counter = defaultdict(lambda: {"count": 0, "Latitude": None, "Longitude": None, "Sources": set()})

# for source_name, raw_text in sources.items():
#     doc = nlp(raw_text)
#     ents = [(ent.text, ent.start_char, ent.label_) for ent in doc.ents if ent.label_ in ("GPE", "LOC")]

#     for place, pos, _ in ents:
#         context_window = raw_text[max(0, pos-200):pos+200].lower()
#         associated_god = None
#         for god, aliases in myth_entities.items():
#             if any(alias.lower() in context_window for alias in aliases):
#                 associated_god = god
#                 break

#         if not associated_god:
#             continue

#         key = (place.strip(), associated_god)

#         # If first time seeing the pair, geocode it
#         if pair_counter[key]["count"] == 0:
#             try:
#                 loc = geolocator.geocode(place)
#                 if loc:
#                     pair_counter[key]["Latitude"] = loc.latitude
#                     pair_counter[key]["Longitude"] = loc.longitude
#                     sleep(1)
#             except:
#                 continue

#         pair_counter[key]["count"] += 1
#         pair_counter[key]["Sources"].add(source_name)


In [9]:
with open("mythology_locations.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=["Place", "Latitude", "Longitude", "God", "Source"])
    writer.writeheader()
    for row in output_data:
        writer.writerow(row)


ValueError: dict contains fields not in fieldnames: 'Frequency'

In [None]:
# with open("mythology_locations_with_counts.csv", "w", newline="") as f:
#     writer = csv.writer(f)
#     writer.writerow(["Place", "God", "Latitude", "Longitude", "Frequency", "Sources"])

#     for (place, god), data in pair_counter.items():
#         writer.writerow([
#             place,
#             god,
#             data["Latitude"],
#             data["Longitude"],
#             data["count"],
#             ", ".join(sorted(data["Sources"]))
#         ])
