In [None]:
import os

def read_code_snippets(directory_path):
    code_snippets = {}
    for filename in os.listdir(directory_path):
        file_path = os.path.join(directory_path, filename)
        if os.path.isfile(file_path):
            with open(file_path, 'r', encoding='utf-8') as file:
                content = file.read()
                code_snippets[filename] = content

    return code_snippets

directory_path = './code_snippets/'
code_snippets_dict = read_code_snippets(directory_path)

from langchain_ollama import ChatOllama

llm = ChatOllama(
    model = "gemma2:2b",
    temperature = 0.8,
    num_predict = 256,
)

descriptions = []

for filename, content in code_snippets_dict.items():
    messages = [
      ("system", "You are a helpful code description generator. I will give you a file name and its content, and you will generate a description for it so that I can use that description to retrieve the file later. For example if I give you a code snippet of middleware in node.js, you should generate a description like 'this is a way to write a middleware in node.js' so that when a user asks for 'How to write a middleware?', I can use this description to retrieve the file."),
      ("human", f"Please generate a description for the following code snippet: {content} \n filename: {filename}"),
    ]
    response = llm.invoke(messages)
    descriptions.append(response.content)

# store the descriptions in a file as filename.txt

os.makedirs('descriptions', exist_ok=True)

for filename, description in zip(code_snippets_dict.keys(), descriptions):
    with open(f'descriptions/{filename}.txt', 'w', encoding='utf-8') as file:
        file.write(description)


In [None]:
import os

files = os.listdir("descriptions")
file_paths = ["./descriptions/" + file for file in files]

from langchain_core.documents import Document

docs = []
for index, file_paths in enumerate(file_paths):
    with open(file_paths, "r") as file:
        text = file.read()
        doc = Document(page_content=text, metadata={"file_path": file_paths}, id=index)
        docs.append(doc)

from langchain_ollama.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings(
    model="nomic-embed-text"
)

In [None]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="descriptions",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",
)

In [None]:
vector_store.add_documents(docs)

In [None]:
retriever = vector_store.as_retriever(
  search_type="similarity", 
  search_kwargs={'k': 1}
)

In [None]:
query_texts = ["prisma"]

for q in query_texts:
    results = retriever.invoke(q)
    print("code_snippets/" + results[0].metadata["file_path"].split("./descriptions/")[1].split(".txt")[0])
    print("code_snippets/" + results[1].metadata["file_path"].split("./descriptions/")[1].split(".txt")[0])
    print("code_snippets/" + results[2].metadata["file_path"].split("./descriptions/")[1].split(".txt")[0])

In [1]:
from langchain_chroma import Chroma
from langchain_ollama.embeddings import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="nomic-embed-text")

vector_store = Chroma(
    collection_name="descriptions",
    embedding_function=embeddings,
    persist_directory="./vector_store",
)

In [16]:
# print all the documents in the collection

vector_store.get()["documents"][131]

"This test file uses the `tap` testing framework to verify the basic functionality of an Express.js application's routing system. It defines three test routes (`/`, `/index.html`, and `/docs`) that are used to check if the corresponding endpoints return a 200 status code with expected content type headers. Additionally, it checks if the `/favicon.ico` endpoint returns a successful response.  \n"

In [11]:
import uuid

uuid.uuid5(uuid.NAMESPACE_DNS, "repositories\\Pop-out-Menu\\docs\\index.html")

UUID('1ef49e66-72af-5f17-be5d-d48afbbb1827')