In [None]:
CHAT_MODEL = 'gpt-4.1-mini'
EMBEDDINGS_MODEL = 'text-embedding-3-large'
NEURO_URL = 'https://neuroapi.host/v1'

# Creating knowledge bases

In [None]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI

embeddings = OpenAIEmbeddings(
    base_url=NEURO_URL,
    api_key=NEURO_KEY,
    model=EMBEDDINGS_MODEL)

llm = ChatOpenAI(
    model=CHAT_MODEL,
    base_url=NEURO_URL,
    api_key=NEURO_KEY)

## Vector database

In [None]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, Distance

# client = QdrantClient(url="http://localhost:6333")
# client.delete_collection('python_docs')
# client.create_collection('python_docs', VectorParams(size=3072, distance=Distance.COSINE))

qdrant = QdrantVectorStore.from_existing_collection(
    embedding=embeddings,
    collection_name="python_docs",
    url="http://localhost:6333",
)

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=800, chunk_overlap=120
)

In [None]:
from tqdm import tqdm
from pathlib import Path
from langchain_community.document_loaders import TextLoader

pydocs = Path('./knowledge_data/python_docs/')

for doc in tqdm(list(pydocs.glob('**/*.txt'))):
    ld = TextLoader(doc).load_and_split(text_splitter=text_splitter)
    ids = qdrant.add_documents(ld)

## Graph database

In [None]:
from langchain_neo4j import Neo4jGraph

graph = Neo4jGraph(
    url='bolt://localhost:7687',
    username='neo4j',
    password='complexpassword')

In [None]:
from langchain_experimental.graph_transformers.llm import LLMGraphTransformer
llm_transformer = LLMGraphTransformer(llm=llm)

In [None]:
for doc in tqdm(list(pydocs.glob('**/*.txt'))):
    ld = TextLoader(doc).load_and_split(text_splitter=text_splitter)
    graph_documents = llm_transformer.convert_to_graph_documents(ld)
    graph.add_graph_documents(graph_documents, baseEntityLabel=True, include_source=True)

## Creating tools and agents

In [None]:
from langchain.tools import tool

# For qdrant
@tool(response_format="content_and_artifact")
def retrieve_context_from_vector(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = qdrant.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

# For neo4j
@tool(response_format="content_and_artifact")
def retrieve_context_from_graph(query: str):
    """Retrieve information to help answer a query."""
    return []
    # retrieved_docs = qdrant.similarity_search(query, k=2)
    # serialized = "\n\n".join(
    #     (f"Source: {doc.metadata}\nContent: {doc.page_content}")
    #     for doc in retrieved_docs
    # )
    # return serialized, retrieved_docs


qdrant_tools = [retrieve_context_from_vector]
neo4j_tools = [retrieve_context_from_graph]

# Loading questions

In [None]:
from langchain.agents import create_agent

prompt = (
    "You have access to a tool that retrieves context from python documentations. "
    "Use this tool to make answers more accurate. "
)

qdrant_agent = create_agent(llm, qdrant_tools, system_prompt=prompt)
neo4j_agent = create_agent(llm, neo4j_tools, system_prompt=prompt)
usual_agent = llm

In [None]:
# from pprint import pprint
# res = qdrant_agent.invoke(
#     {"messages": [{"role": "user", "content": "How do python classes differ from other languages?"}]}
# )
# pprint(res)

# Q&A testing

In [None]:
from datasets import load_dataset

ds = load_dataset("Myashka/SO-Python_QA-filtered-2023-tanh_score")
df = ds['train'].to_pandas()
qas = df[df['is_accepted'] == True].sample(20)
qas = qas[['Question', 'Title', 'Answer']]

In [None]:
import pandas as pd

qas_df = pd.read_csv('qas.csv')
answers = []

for q in qas_df['Question']:
    answers.append(qdrant_agent.invoke(
        {"messages": [{"role": "user", "content": q}]}
    ))

In [None]:
from langchain.messages import HumanMessage, AIMessage, ToolMessage
from langchain_core.documents import Document

with open('out.txt', 'r+') as f:
    ans = eval(f.read())
print(ans)