In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(disallowed_special=())

In [None]:
import os
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

root_dir = "./eng-feed"
aws_dir = "./eng-feed/src/aws/lib"
lambda_dir = "./eng-feed/src/aws/backend/lambda"
db_dir = "./eng-feed/src/aws/backend/db"
pages_dir = "./eng-feed/src/pages"
components_dir = "./eng-feed/src/components"

docs = []
for directory in [aws_dir, lambda_dir, db_dir, pages_dir, components_dir]:
    for dirpath, dirnames, filenames in os.walk(directory):
        for file in filenames:
            try:
                loader = TextLoader(os.path.join(dirpath, file), encoding="utf-8")
                docs.extend(loader.load_and_split())
            except Exception as e:
                print(e)
                pass

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=0,
)
code = text_splitter.split_documents(docs)

    

In [None]:
from langchain.vectorstores import DeepLake

deeplake_username = "haffimazhar96"
deeplake_db = "code-search"

In [None]:
db = DeepLake(
    dataset_path=f"hub://{deeplake_username}/{deeplake_db}",
    embedding=embeddings,
)
db.add_documents(code)

In [None]:
db = DeepLake(
    dataset_path=f"hub://{deeplake_username}/{deeplake_db}",
    read_only=True,
    embedding=embeddings,
)

In [None]:
retriever = db.as_retriever()
retriever.search_kwargs["distance_metric"] = "cos"
retriever.search_kwargs["fetch_k"] = 100
retriever.search_kwargs["maximal_marginal_relevance"] = True
retriever.search_kwargs["k"] = 10

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

model = ChatOpenAI(model_name="gpt-3.5-turbo-0613")  # switch to 'gpt-4'
qa = RetrievalQA.from_llm(model, retriever=retriever)

In [None]:
query = "How are users notified when there is a new post?"
qa.run(query)

In [None]:
query = "What does the frontend show?"
qa.run(query)

In [None]:
query = "Where is the frontend hosted?"
qa.run(query)

In [None]:
query = "How could host the frontend on vercel?"
qa.run(query)