In [1]:
import openai
import json
import os
from tqdm import tqdm
from dotenv import load_dotenv

load_dotenv()

# Set your OpenAI API key here or export it as an env variable
client = openai.OpenAI(api_key=os.getenv("OPEN_AI_KEY"))


# Helper to create prompt
def create_prompt(spell_name, description, location, availability, spell_type):
    return f"""
You are a Dark Souls lore extraction assistant. Given a spell's name, type, description, and availability, extract a list of factual (subject, predicate, object) triples related to lore (NOT gameplay).

Focus on relationships such as:
- who created or used the spell (e.g., created_by, associated_with)
- where it originates or is found
- who sells or gives it
- its connection to people, locations, covenants, factions, dragons, etc.

Avoid gameplay mechanics, effects, stats, or anything not related to lore.

Output only factual triples in this format:
[
  (subject, predicate, object),
  ...
]

Remember that predicate should always have underscore between any two 2 words.
---

Spell Name: {spell_name}
Type: {spell_type}

Description:
{description or '[No description]'}

Location:
{location}

Availability:
- {'\n- '.join(availability)}
"""

# Function to query OpenAI and parse result
def extract_triplets(prompt):
    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2
        )
        return eval(response.choices[0].message.content.strip())
    except Exception as e:
        print("Error:", e)
        return []

# Process all spells from all three files
def process_spells(filepath, spell_type):
    with open(filepath, "r") as f:
        spells = json.load(f)

    all_triples = []
    for spell in tqdm(spells, desc=f"Processing {spell_type}"):
        prompt = create_prompt(
            spell["name"],
            spell.get("in_game_description", ""),
            spell.get("location", ""),
            spell.get("availability", []),
            spell_type
        )
        triples = extract_triplets(prompt)
        all_triples.extend([list(t) for t in triples])
    return all_triples

# Run for all three spell categories
current_dir = os.getcwd()
scraping_dir = os.path.join(current_dir, '..', 'Scrape_Pages')
scraping_dir = os.path.abspath(scraping_dir)
pyromancy_path = os.path.join(scraping_dir, 'dark_souls_pyromancies_full.json') 
sorcery_path = os.path.join(scraping_dir, 'dark_souls_sorceries_full.json')
miracle_path = os.path.join(scraping_dir, 'dark_souls_miracles_full.json')
triples_all = []
triples_all += process_spells(pyromancy_path, "Pyromancy")
triples_all += process_spells(miracle_path, "Miracle")
triples_all += process_spells(sorcery_path, "Sorcery")

# Save to JSON
with open("spell_triplets.json", "w") as f:
    json.dump(triples_all, f, indent=2)

print("Lore triples extracted and saved to 'spell_triplets.json'")


Processing Pyromancy: 100%|██████████| 20/20 [01:58<00:00,  5.93s/it]
Processing Miracle: 100%|██████████| 20/20 [01:44<00:00,  5.22s/it]
Processing Sorcery: 100%|██████████| 20/20 [02:53<00:00,  8.69s/it]

✅ Lore triples extracted and saved to 'spell_triplets.json'



