In [1]:
%pip install --upgrade openai tqdm dotenv

Note: you may need to restart the kernel to use updated packages.


In [2]:
import openai
import json
import os
from tqdm import tqdm
from dotenv import load_dotenv

load_dotenv()

# Set your OpenAI API key here or export it as an env variable
client = openai.OpenAI(api_key=os.getenv("OPEN_AI_KEY"))


In [3]:
# Get path to the JSON file relative to the current script
current_dir = os.getcwd()
scraping_dir = os.path.join(current_dir, '..', 'Scrape_Pages')
scraping_dir = os.path.abspath(scraping_dir) 
gifts_path = os.path.join(scraping_dir, 'dark_souls_gifts_detailed.json')
with open(gifts_path, "r") as f:
    gifts = json.load(f)

In [4]:
# Prompt template function
def create_prompt(name, description, availability):
    return f"""
You are a Dark Souls lore extraction assistant. Given the name, in-game description, and availability of an item, extract a list of factual (subject, predicate, object) triples that describe only **relationships with characters, locations, groups, or items**.

Focus on:
- who the item is associated with
- who gives or sells the item
- where it is found
- what it is traded for or with
- any related characters, places, factions

Do NOT include triples about the item's effects, powers, or gameplay mechanics.

Output format:
[
  (subject, predicate, object),
  ...
]

Remember that predicate should always have underscore between any two 2 words.
---
For example:
Item Name: Divine Blessing

In-Game Description:
Holy water from Goddess Gwynevere. The Goddess of Sunlight, Gwynevere, daughter of the great Lord of Sunlight Gwyn, is cherished by all as the symbol of bounty and fertility.

Availability:
- Select Divine Blessing as a starting gift in a New Game
- Sold by Rhea of Thorolund in the Undead Parish after saving her from the Tomb of the Giants for 1,000 souls. Limited to 1 per playthrough
- Sold by Patches in Firelink Shrine for 20,000 souls. Limited to 1 per playthrough
- Gift from Alvina after getting 1 successful kill as a Forest Hunter using the Cat Covenant Ring
- Treasure (x2) from a corpse in the lava area near the second tower in Lost Izalith
- Treasure from a chest in Sen's Fortress, guarded by Undead Prince Ricard
- Treasure from a chest in the upper level of the Painting building in Anor Londo
- Trade Ring of the Sun Princess with Snuggly the Crow for 2 Divine Blessings
- Drop from Red Good Vagrants spawned from pendants

The output should be in the following format:
[
  ("Divine Blessing", "associated_with", "Gwynevere"),
  ("Gwynevere", "daughter_of", "Gwyn"),
  ("Divine Blessing", "sold_by", "Rhea of Thorolund"),
  ("Rhea of Thorolund", "located_in", "Undead Parish"),
  ("Rhea of Thorolund", "rescued_from", "Tomb of the Giants"),
  ("Divine Blessing", "sold_by", "Patches"),
  ("Patches", "located_in", "Firelink Shrine"),
  ("Divine Blessing", "gift_from", "Alvina"),
  ("Alvina", "requires_action", "1 successful kill as Forest Hunter"),
  ("Divine Blessing", "found_in", "Lost Izalith"),
  ("Divine Blessing", "found_in", "Sen's Fortress"),
  ("Divine Blessing", "guarded_by", "Undead Prince Ricard"),
  ("Divine Blessing", "found_in", "Anor Londo"),
  ("Divine Blessing", "traded_for", "Ring of the Sun Princess"),
  ("Divine Blessing", "traded_with", "Snuggly the Crow"),
  ("Divine Blessing", "dropped_by", "Red Good Vagrants")
]
---------------

Item - {name}
In Game Description - {description}
Availability:
- {'\n- '.join(availability)}
"""


In [5]:
def extract_triplets(prompt):
    try:
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3
        )
        content = response.choices[0].message.content
        return eval(content.strip())
    except Exception as e:
        print(f"Error: {e}")
        return []
# Process all gifts
triples_output = []

for gift in tqdm(gifts):
    prompt = create_prompt(
        gift["name"],
        gift.get("in_game_description", ""),
        gift.get("availability", [])
    )
    triples = extract_triplets(prompt)# Convert tuple to list for JSON
    for triple in triples:
        triples_output.append(list(triple))

100%|██████████| 8/8 [00:35<00:00,  4.40s/it]


In [6]:
# Save results
with open("gift_triplets.json", "w") as f:
    json.dump(triples_output, f, indent=2)