In [1]:
import openai
import json
import os
from tqdm import tqdm
from dotenv import load_dotenv

load_dotenv()

# Set your OpenAI API key here or export it as an env variable
client = openai.OpenAI(api_key=os.getenv("OPEN_AI_KEY"))

# ✅ Load the weapons JSON
current_dir = os.getcwd()
scraping_dir = os.path.join(current_dir, '..', 'Scrape_Pages')
scraping_dir = os.path.abspath(scraping_dir)
weapons_path = os.path.join(scraping_dir, 'dark_souls_weapons_full.json') 
with open(weapons_path, "r") as f:
    weapons_data = json.load(f)

# Prompt template for LLM
def create_prompt(weapon_name, description, availability, category):
    return f"""
You are a Dark Souls lore extraction assistant. Given a weapon's name, category, description, and availability, extract a list of factual (subject, predicate, object) triples. Focus on lore-relevant relationships only.

Ignore combat mechanics or damage types.

Extract:
- who the weapon is associated with (e.g. wielded_by, created_by, soul_of)
- where it can be found (e.g. found_in, dropped_by)
- any affiliations with characters, creatures, locations, or factions

Do NOT include attack types, effects, or stats.

Output format:
[
  (subject, predicate, object),
  ...
]

Remember that predicate should always have underscore between any two 2 words.
---

Weapon Name: {weapon_name}
Category: {category}

In-Game Description:
{description or '[No description provided]'}

Availability:
- {'\n- '.join(availability)}
"""

# Run extraction via OpenAI
def extract_triplets(prompt):
    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2
        )
        output = response.choices[0].message.content.strip()
        return eval(output)
    except Exception as e:
        print("Error:", e)
        return []

#  Process all weapons
all_triples = []

for entry in tqdm(weapons_data):
    category = entry["category"]
    for weapon in entry["weapons"]:
        name = weapon["name"]
        description = weapon.get("in_game_description", "")
        availability = weapon.get("availability", [])
        prompt = create_prompt(name, description, availability, category)
        triples = extract_triplets(prompt)
        for triple in triples:
            all_triples.append(list(triple))  # save as [subject, predicate, object]

# ✅ Save to JSON
with open("weapon_triplets.json", "w") as f:
    json.dump(all_triples, f, indent=2)

print(" Triples saved to 'weapon_triplets.json'")

100%|██████████| 19/19 [04:28<00:00, 14.11s/it]

✅ Triples saved to 'weapon_triplets.json'



