In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [14]:
import os
import lancedb

from langchain.embeddings.ollama import OllamaEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOllama
from langchain.chains import RetrievalQA
from langchain.vectorstores import LanceDB

In [15]:
aws_dir = "./eng-feed/src/aws/lib"
lambda_dir = "./eng-feed/src/aws/backend/lambda"
db_dir = "./eng-feed/src/aws/backend/db"
pages_dir = "./eng-feed/src/pages"
components_dir = "./eng-feed/src/components"

docs = []
for directory in [aws_dir, lambda_dir, db_dir, pages_dir, components_dir]:
    for dirpath, dirnames, filenames in os.walk(directory):
        for file in filenames:
            try:
                loader = TextLoader(os.path.join(dirpath, file), encoding="utf-8")
                docs.extend(loader.load_and_split())
            except Exception as e:
                print(e)
                pass

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=0,
)
code = text_splitter.split_documents(docs)

In [None]:
embeddings = OllamaEmbeddings(model="mistral")

In [17]:
db = lancedb.connect('/tmp/lancedb')
table = db.create_table("code_files", data=[
    {"vector": embeddings.embed_query("Hello World"), "text": "Hello World", "id": "1"}
], mode="overwrite")
code_search = LanceDB.from_documents(code, embeddings, connection=table)

In [18]:
retriever = code_search.as_retriever()
retriever.search_kwargs["distance_metric"] = "cos"
retriever.search_kwargs["fetch_k"] = 100
retriever.search_kwargs["maximal_marginal_relevance"] = True
retriever.search_kwargs["k"] = 10

model = ChatOllama(model_name="mistral")
qa = RetrievalQA.from_llm(llm=model, retriever=retriever)

In [19]:
query = "How are users notified when there is a new post?"
qa.run(query)

'Users are notified when there is a new post through an AWS Lambda function called "NotifyUsers". This function is scheduled to run at a specific time using a Lambda schedule rule. When the function runs, it fetches the new posts and collects the IDs of the users who need to be notified. It then sends notifications to these users using a combination of technologies such as SQS (Simple Queue Service), SNS (Simple Notification Service), and email. The exact implementation details may vary depending on the specific codebase and architecture.'