In [1]:
# %%
import os
from pathlib import Path
from langchain.document_loaders import DirectoryLoader, UnstructuredMarkdownLoader

# Configuration
PERSIST_DIR     = "chroma_db"
COLLECTION_NAME = "avax_docs"
BASE_URL        = "192.168.86.9"

# 1. Load raw MD/MDX files
docs_path = Path("embeddings/raw")                      
loader = DirectoryLoader(
    str(docs_path),
    glob="**/*.md*",                         
    loader_cls=UnstructuredMarkdownLoader,
    show_progress=True,
)
raw_docs = loader.load()


# %%
import re
# 2. Clean out images and front-matter
clean_docs = []
for d in raw_docs:
    text = re.sub(r"!\[.*?\]\(.*?\)", "", d.page_content)      # remove images
    text = re.sub(r"^---.*?---\s*", "", text, flags=re.S)     # drop YAML front-matter
    clean_docs.append(d.copy(update={"page_content": text}))


# %%
from langchain.text_splitter import RecursiveCharacterTextSplitter
# 3. Chunk into ~1,500-char pieces
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500, chunk_overlap=200,
    separators=["\n## ", "\n### ", "\n", " ", ""]
)
docs = splitter.split_documents(clean_docs)


# %%
from langchain_ollama.embeddings import OllamaEmbeddings
from langchain.vectorstores import Chroma

# 4. Prepare embeddings + vector store (idempotent)
embeddings = OllamaEmbeddings(
    model="nomic-embed-text:v1.5",
    base_url=BASE_URL, 
)

if os.path.exists(PERSIST_DIR):
    # Reload existing store
    vectordb = Chroma(
        collection_name=COLLECTION_NAME,
        embedding_function=embeddings,
        persist_directory=PERSIST_DIR,
    )
    print(f"[+] Loaded existing Chroma store from {PERSIST_DIR}")
else:
    # Create new store, ingest, and persist
    vectordb = Chroma(
        collection_name=COLLECTION_NAME,
        embedding_function=embeddings,
        persist_directory=PERSIST_DIR,
    )
    vectordb.add_documents(docs)
    vectordb.persist()
    print(f"[+] Created new Chroma store and saved to {PERSIST_DIR}")



# %%
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain_ollama.llms import OllamaLLM
from langchain.chains import RetrievalQA
import json
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

# 5. Define response schema and parser
response_schemas = [
    ResponseSchema(name="reply", description="Conversational answer."),
    ResponseSchema(
        name="update",
        description="JSON merge-patch for the config (empty if no change).",
    ),
]
parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = parser.get_format_instructions()

json_structure = '''{
 "subnetId":"string,
 "vmId": "string",
  "evmChainId": "number",
  "gasLimit": "number", 
  "targetBlockRate": "number",
  "tokenAllocations": [
    {
      "address": "string",
      "amount": "string"
    }
  ],
  "feeConfig": {
    "minBaseFee": "string",
    "baseFeeChangeDenominator": "number",
    "minBlockGasCost": "string", 
    "maxBlockGasCost": "string",
    "blockGasCostStep": "string",
    "targetGas": "string"
  },
  "contractDeployerAllowListConfig": {
    "enabled": "boolean",
    "admins": ["string"],
    "members": ["string"],
    "enabledAddresses": ["string"]
  },
  "contractNativeMinterConfig": {
    "enabled": "boolean", 
    "admins": ["string"],
    "members": ["string"],
    "enabledAddresses": ["string"],
  },
  "txAllowListConfig": {
    "enabled": "boolean",
    "admins": ["string"], 
    "members": ["string"],
    "enabledAddresses": ["string"]
  },
  "feeManagerEnabled": "boolean",
  "feeManagerAdmins": ["string"],
  "rewardManagerEnabled": "boolean", 
  "rewardManagerAdmins": ["string"]
}'''

SYSTEM_TEMPLATE = (
    "You are Avalanche-GPT, an assistant that answers developer questions about Avalanche infrastructure "
    "and helps them edit their Avalanche network configuration.\n\n"
    "You output exactly two things in JSON: reply (a human-friendly answer) and update "
    "(a JSON merge-patch for the config; empty object if no change is needed).\n\n"
    "Allowed JSON fields/types:\n{json_structure}\n\n"
    "⚠️ OUTPUT FORMAT:\nRespond **only** with a JSON object containing reply and update.\n"
    "{format_instructions}"
)
system_msg = SystemMessagePromptTemplate.from_template(SYSTEM_TEMPLATE)

# 8. Human prompt with context, config, and question
HUMAN_TEMPLATE = (
    "Context from docs:\n{context}\n\n"
    "Question:\n{question}"    # <-- must be {query}, not {question}
)
human_msg = HumanMessagePromptTemplate.from_template(HUMAN_TEMPLATE)

# 9. Assemble chat prompt and inject static pieces
prompt = (
    ChatPromptTemplate.from_messages([system_msg, human_msg])
    .partial(
        format_instructions=format_instructions,
        json_structure=json_structure,
    )
)

# 10. LLM & RetrievalQA chain setup
llm = OllamaLLM(
    model="qwen2.5-coder:3b-instruct-q8_0",
    temperature=0.2,
    base_url=BASE_URL,
    num_ctx=16384,
)
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectordb.as_retriever(search_kwargs={"k": 4}),
    return_source_documents=False,
    chain_type_kwargs={"prompt": prompt},
)

payload = {
    "chat_history": "User: Hi\nBot: Hello! How can I help?\n",
    "user_config":  json.dumps({"gasLimit": "8000000"}),
    "question":     "How do I set up a subnet with a custom gas fee?"
}

# Combine the three into the single string RetrievalQA expects under 'query'
query_string = (
    f"Previous chat history:\n{payload['chat_history']}\n\n"
    f"Current user config JSON:\n{payload['user_config']}\n\n"
    f"Current question:\n{payload['question']}"
)

result_text = qa_chain.invoke({"query": query_string})["result"]
parsed      = parser.parse(result_text)

print("LLM reply:\n", parsed["reply"])
print("\nJSON updates:\n", json.dumps(parsed["update"], indent=2))

# %%




100%|██████████| 759/759 [00:18<00:00, 41.28it/s]
C:\Users\IDF\AppData\Local\Temp\ipykernel_23288\3532727086.py:29: PydanticDeprecatedSince20: The `copy` method is deprecated; use `model_copy` instead. See the docstring of `BaseModel.copy` for details about how to handle `include` and `exclude`. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  clean_docs.append(d.copy(update={"page_content": text}))
  vectordb = Chroma(


[+] Loaded existing Chroma store from chroma_db
LLM reply:
 To set up a subnet with a custom gas fee, you need to update the `gasLimit` parameter in your subnet configuration. This parameter determines the maximum amount of gas that can be used for transactions within the subnet. You can adjust this value based on your specific requirements and network conditions.

JSON updates:
 {
  "gasLimit": "10000000"
}
