In [7]:
# Imports
import os
import json
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.schema import Document


In [None]:
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

In [8]:
# Initialize embedding model
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

# Initialize or connect to Chroma database
stories_db = Chroma(
    collection_name="stories_db",
    embedding_function=embeddings,
    persist_directory="stories_db"
)


In [9]:
# Folder where your JSON stories are stored
json_folder = "./data"  # <-- change this to your folder

documents = []

for filename in os.listdir(json_folder):
    if filename.endswith(".json"):
        filepath = os.path.join(json_folder, filename)
        with open(filepath, "r", encoding="utf-8") as f:
            data = json.load(f)

            # Case 1: file contains a list of stories
            if isinstance(data, list):
                for item in data:
                    doc = Document(page_content=item.get("text", ""))
                    documents.append(doc)

            # Case 2: file contains a single story object
            elif isinstance(data, dict):
                doc = Document(page_content=data.get("text", ""))
                documents.append(doc)
print(f"✅ Loaded {len(documents)} stories from JSON files.")


✅ Loaded 159 stories from JSON files.


In [10]:
# 3. Add to Chroma
# ----------------------------
before_count = stories_db._collection.count()  # number of docs before adding
stories_db.add_documents(documents)
stories_db.persist()
after_count = stories_db._collection.count()

print(f"📂 Stories before: {before_count}")
print(f"📥 Added: {after_count - before_count}")
print(f"📊 Total now in DB: {after_count}")

📂 Stories before: 0
📥 Added: 159
📊 Total now in DB: 159


In [11]:
query = "Tell me a story about resilience"
results = stories_db.similarity_search(query, k=2)

for i, res in enumerate(results, 1):
    print(f"\nStory {i}: {res.page_content[:200]}...")



Story 1: ...

Story 2: ...
