In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
import os
import lancedb

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.vectorstores import LanceDB

In [None]:
ROOT_DIR = "eng-feed"
ignore = (".git", "node_modules", ".lock", "public", "assets")

filtered = []
for dirpath, dirnames, filenames in os.walk("eng-feed"):
    dirnames[:] = [d for d in dirnames if d not in ignore]
    filenames[:] = [f for f in filenames if f not in ignore]
    for filename in filenames:
        filtered.append(os.path.join(dirpath, filename))

In [None]:
code_text = []
for file in filtered:
    try:
        loader = TextLoader(file, encoding="utf-8")
        code_text.extend(loader.load_and_split())
    except Exception as e:
        print(e)
        pass

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=0,
)
code = text_splitter.split_documents(code_text)

In [None]:
embeddings = OpenAIEmbeddings()

In [None]:
db = lancedb.connect('./tmp/lancedb')
table = db.create_table("code_files", data=[
    {"vector": embeddings.embed_query("Hello World"), "text": "Hello World", "id": "1"}
], mode="overwrite")
code_search = LanceDB.from_documents(code, embeddings, connection=table)

In [None]:
retriever = code_search.as_retriever()
retriever.search_kwargs["distance_metric"] = "cos"
retriever.search_kwargs["fetch_k"] = 100
retriever.search_kwargs["maximal_marginal_relevance"] = True
retriever.search_kwargs["k"] = 10

OpenAIModel = ChatOpenAI(model_name="gpt-3.5-turbo-0613")
qa = RetrievalQA.from_llm(llm=OpenAIModel, retriever=retriever)

In [None]:
query = "How are users notified when there is a new post?"
qa.run(query)

In [None]:
query = "When does the lambda to notify users run?"
qa.run(query)

In [None]:
query = "What framework is the frontend in?"
qa.run(query)

In [None]:
query = "How is the styling done on the frontend?"
qa.run(query)

In [None]:
query = "How can I add postgres rds to the aws stack?"
qa.run(query)