In [1]:
!pip install faiss-cpu sentence-transformers



In [2]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from datetime import datetime

In [3]:
model = SentenceTransformer("all-MiniLM-L6-v2")
memory_texts = []
metadata_list = []

`d` is the dimension of embedding and `IndexFlatIP` is inner product or dot product. Note, embeddings must be normalized in order compute cosine similarity this way.

In [4]:
d = 384
index = faiss.IndexFlatIP(d)

In [5]:
def add_turn(text, players=[], npcs=[], tags=[], scene_id=None):
    embedding = model.encode([text], convert_to_numpy=True, normalize_embeddings=True)
    index.add(embedding)
    memory_texts.append(text)
    metadata = {
        "text": text,
        "timestamp": datetime.utcnow().isoformat(),
        "players": players,
        "npcs": npcs,
        "tags": tags,
        "scene_id": scene_id
    }
    metadata_list.append(metadata)

In [6]:
def query_memory(query_text, top_k=3):
    """
    Query the memory index using a text string and return the top_k most similar entries
    based on cosine similarity.
    
    Args:
        query_text (str): The text to search for in memory.
        top_k (int, optional): Number of top results to return. Defaults to 3.
    
    Returns:
        List[Dict[str, Any]]: A list of results with text, similarity score, and metadata.
    """
    query_embedding = model.encode(
        [query_text],
        convert_to_numpy=True,
        normalize_embeddings=True
    )
    
    similarity_scores, memory_indices = index.search(query_embedding, top_k)
    
    results = [
        {
            "text": memory_texts[idx],
            "similarity": float(score),
            "metadata": metadata_list[idx]
        }
        for idx, score in zip(memory_indices[0], similarity_scores[0])
    ]
    
    return results


In [7]:
add_turn("Player1 opened the treasure chest", 
         players=["Player1"], 
         npcs=[], 
         tags=["exploration"], 
         scene_id=1)
add_turn("The party killed the dragon",
         players=["Player1","Player2","Player3"],
         npcs=["Dragon"],
         tags=["combat"],
         scene_id=2)
add_turn("Wizard asked to kill the bandits.",
         players=["Player1","Player2","Player3"],
         npcs=["Wizard"],
         tags=["quest"],
         scene_id=3)
add_turn("Bandits attacked the village",
         players=[],
         npcs=["Bandits"],
         tags=["dialogue"],
         scene_id=4)
add_turn("Player2 stole the key.",
         players=["Player2"],
         npcs=["Guard"],
         tags=["exploration"],
         scene_id=5)

In [9]:
query_results = query_memory("Who stole the key?", top_k=2)

for r in query_results:
    print(f"Match: {r['text']} | Similarity={r['similarity']:.2f} | Metadata={r['metadata']}")


Match: Player2 stole the key. | Similarity=0.75 | Metadata={'text': 'Player2 stole the key.', 'timestamp': '2025-10-01T12:44:27.648091', 'players': ['Player2'], 'npcs': ['Guard'], 'tags': ['exploration'], 'scene_id': 5}
Match: Player1 opened the treasure chest | Similarity=0.39 | Metadata={'text': 'Player1 opened the treasure chest', 'timestamp': '2025-10-01T12:44:27.608449', 'players': ['Player1'], 'npcs': [], 'tags': ['exploration'], 'scene_id': 1}


In [None]:
def build_dm_prompt(player_action, retrieved_memories, world_state):
    memory_texts = "\n".join([m["text"] for m in retrieved_memories])
    world_facts = "\n".join([f"{k}: {v}" for k, v in world_state.items()])
    return f"""
You are a Dungeon Master running a fantasy campaign.
You must be consistent with past events and the current world state.
Respond to player's action like a Dungen Master would.
Tell them the consequences of their actions and make it like you are reading from fantasy book.

--- World Facts ---
{world_facts}

--- Relevant Past Memories ---
{memory_texts}

--- Player Action ---
{player_action}

--- Your Task ---
1. Narrate what happens next in a dramatic but concise way.
2. Roleplay any NPCs if they are involved.
3. Do NOT contradict the memories or world facts.
4. Keep tone consistent with a fantasy Dungeon Master.
"""

In [None]:
world_state = {
    "location": "Dark Cave",
    "npc": "",
    "quest": "Retrieve the treasure"
}

In [None]:
player_action = "I look inside the treasure chest to see what's in there!"
retrieved = query_memory(player_action, top_k=3)
prompt = build_dm_prompt(player_action, retrieved, world_state)

In [None]:
from transformers import pipeline
from huggingface_hub import snapshot_download

model_name = "mistralai/Mistral-7B-v0.1"

cache_dir = snapshot_download(
    repo_id=model_name,
    use_auth_token=True
)

print(f"Model cached at: {cache_dir}")

Fetching 14 files:   0%|          | 0/14 [00:00<?, ?it/s]

README.md:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/5.06G [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

In [23]:
pipe = pipeline("text-generation",model=cache_dir, device_map="auto")
outputs = pipe(prompt, max_new_tokens=100, do_sample=True, temperature=0.7)
print(outputs[0]["generated_text"])



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu and disk.
Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



You are a Dungeon Master running a fantasy campaign.
You must be consistent with past events and the current world state.
Respond to player's action like a Dungen Master would.
Tell them the consequences of their actions and make it like you are reading from fantasy book.

--- World Facts ---
location: Dark Cave
npc: 
quest: Retrieve the treasure

--- Relevant Past Memories ---
Player1 opened the treasure chest
Player2 stole the key.
Wizard asked to kill the bandits.

--- Player Action ---
I look inside the treasure chest to see what's in there!

--- Your Task ---
1. Narrate what happens next in a dramatic but concise way.
2. Roleplay any NPCs if they are involved.
3. Do NOT contradict the memories or world facts.
4. Keep tone consistent with a fantasy Dungeon Master.

--- Output ---
You open the chest to reveal a pile of coins and a key!
"Hey, that's my key!" Player2 shouts.
"Hey, I'm the wizard! Kill the bandits!" the wizard replies.

--- Answer Key ---
1. I open the chest to reveal