In [None]:
# db_fill.ipynb

In [1]:
import os
import re
import yaml
import json
from pathlib import Path
import weaviate
from weaviate.classes.init import Auth
from weaviate.classes.config import (
    Configure,
    Property,
    DataType,
)
from dotenv import load_dotenv

In [2]:
load_dotenv()
openai_api_key = os.environ["OPENAI_API_KEY"]
weaviate_url = os.environ["WEAVIATE_REST_ENDPOINT"]
weaviate_api_key = os.environ["WEAVIATE_API_KEY"]

headers = {
    "X-OpenAI-Api-Key": openai_api_key
}

In [3]:
client = weaviate.connect_to_weaviate_cloud(
    cluster_url=weaviate_url,
    auth_credentials=Auth.api_key(weaviate_api_key),
    skip_init_checks=True,
    headers=headers
)
print("Weaviate ready:", client.is_ready())

Weaviate ready: True


In [4]:
def parse_markdown_file(path: Path):
    text = path.read_text(encoding="utf-8")
    match = re.match(r"---\n(.*?)\n---\n(.*)", text, re.DOTALL)
    if not match:
        return None
    yaml_str, content = match.groups()
    metadata = yaml.safe_load(yaml_str)
    return {
        "metadata": metadata,
        "content": content.strip(),
        "file_path": str(path)
    }

In [5]:
client.collections.create(
    name="ChessKnowledgeBase",
    properties=[
        Property(name="title", data_type=DataType.TEXT),
        Property(name="type", data_type=DataType.TEXT),
        Property(name="tags", data_type=DataType.TEXT_ARRAY),
        Property(name="content", data_type=DataType.TEXT),
    ],
    vector_config=[
        Configure.Vectors.text2vec_openai(
            name="knowledge_note_chess",
            source_properties=["title", "type", "tags", "content"],
            model="text-embedding-3-large",
            dimensions=3072
        )
    ],
)
collection = client.collections.use("ChessKnowledgeBase")

In [6]:
# # # client.collections.delete("ChessKnowledgeBase")

In [7]:
base_dir = Path("chess-data/chess-knowledge-base")

all_files = list(base_dir.rglob("*.md"))
print(f"Number of files found: {len(all_files)}")
with collection.batch.fixed_size(batch_size=100) as batch:
    for f in all_files:
        if "games" in f.parts:
            continue
        doc = parse_markdown_file(f)
        if not doc:
            continue
        meta = doc["metadata"]
        batch.add_object(
            properties={
                "title": meta.get("title"),
                "type": meta.get("type"),
                "tags": meta.get("tags", []),
                # "source_url": meta.get("source_url"),
                # "scraped_at": meta.get("scraped_at"),
                # "meta": meta.get("meta"),
                "content": doc["content"],
                # "file_path": doc["file_path"],
            }
        )
        if batch.number_errors > 10:
            print("Batch stopped due to too many errors.")
            break

failed = collection.batch.failed_objects
if failed:
    print("Failed objects:", len(failed))
    print(failed[0])

print("Import Finished.")


Number of files found: 273
Import Finished.


In [8]:
response = collection.query.near_text(
    query="Provide some middlegame advice reggarding pawn structures and piece activity.",
    limit=2
)

for obj in response.objects:
    print(json.dumps(obj.properties, indent=2))

{
  "content": "# Be Careful with Pawn Structure\n\n## Explanation\n\nPawn structure influences the game's direction. Tight structures suggest defensive play, while spaced-out pawns indicate aggressive strategies.",
  "type": "middlegame",
  "tags": [
    "Pawn Structure"
  ],
  "title": "Be Careful with Pawn Structure"
}
{
  "title": "General Middlegame Ideas",
  "type": "middlegame",
  "tags": [],
  "content": "# General Middlegame Ideas\n\n## Explanation\n\nStrategies to enhance your middlegame play include playing longer time controls, studying tactics, and understanding pawn structures and piece placement. It is also important to analyze your games without a chess engine to improve your understanding.\n## Examples\n\n- Stop playing blitz and bullet; play longer time controls.\n- Study tactics extensively.\n- Double-check your moves for safety.\n- Analyze your games without a chess engine.\n- Understand pawn structures and piece placement in openings."
}
