In [12]:
from dotenv import load_dotenv
load_dotenv()

True

In [13]:
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(disallowed_special=())

In [15]:
import os
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

root_dir = "./eng-feed"
aws_dir = "./eng-feed/src/aws/lib"
lambda_dir = "./eng-feed/src/aws/backend/lambda"
db_dir = "./eng-feed/src/aws/backend/db"
pages_dir = "./eng-feed/src/pages"
components_dir = "./eng-feed/src/components"

docs = []
for directory in [aws_dir, lambda_dir, db_dir, pages_dir, components_dir]:
    for dirpath, dirnames, filenames in os.walk(directory):
        for file in filenames:
            try:
                loader = TextLoader(os.path.join(dirpath, file), encoding="utf-8")
                docs.extend(loader.load_and_split())
            except Exception as e:
                print(e)
                pass

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=0,
)
code = text_splitter.split_documents(docs)

    

In [17]:
from langchain.vectorstores import DeepLake

deeplake_username = "haffimazhar96"
deeplake_db = "code-search"

In [18]:
db = DeepLake(
    dataset_path=f"hub://{deeplake_username}/{deeplake_db}",
    embedding=embeddings,
)
db.add_documents(code)

Your Deep Lake dataset has been successfully created!


creating embeddings: 100%|██████████| 1/1 [00:08<00:00,  8.74s/it]

Dataset(path='hub://haffimazhar96/code-search', tensors=['text', 'metadata', 'embedding', 'id'])

  tensor      htype       shape      dtype  compression
  -------    -------     -------    -------  ------- 
   text       text      (125, 1)      str     None   
 metadata     json      (125, 1)      str     None   
 embedding  embedding  (125, 1536)  float32   None   
    id        text      (125, 1)      str     None   





['433535b4-7be6-11ee-befb-e24bee7314b9',
 '433537ee-7be6-11ee-befb-e24bee7314b9',
 '4335383e-7be6-11ee-befb-e24bee7314b9',
 '43353870-7be6-11ee-befb-e24bee7314b9',
 '433538ac-7be6-11ee-befb-e24bee7314b9',
 '433538de-7be6-11ee-befb-e24bee7314b9',
 '43353910-7be6-11ee-befb-e24bee7314b9',
 '433539a6-7be6-11ee-befb-e24bee7314b9',
 '433539e2-7be6-11ee-befb-e24bee7314b9',
 '43353a14-7be6-11ee-befb-e24bee7314b9',
 '43353a46-7be6-11ee-befb-e24bee7314b9',
 '43353a82-7be6-11ee-befb-e24bee7314b9',
 '43353aaa-7be6-11ee-befb-e24bee7314b9',
 '43353adc-7be6-11ee-befb-e24bee7314b9',
 '43353b04-7be6-11ee-befb-e24bee7314b9',
 '43353b36-7be6-11ee-befb-e24bee7314b9',
 '43353b5e-7be6-11ee-befb-e24bee7314b9',
 '43353b90-7be6-11ee-befb-e24bee7314b9',
 '43353bc2-7be6-11ee-befb-e24bee7314b9',
 '43353bf4-7be6-11ee-befb-e24bee7314b9',
 '43353c1c-7be6-11ee-befb-e24bee7314b9',
 '43353c4e-7be6-11ee-befb-e24bee7314b9',
 '43353c80-7be6-11ee-befb-e24bee7314b9',
 '43353ca8-7be6-11ee-befb-e24bee7314b9',
 '43353cd0-7be6-

In [19]:
db = DeepLake(
    dataset_path=f"hub://{deeplake_username}/{deeplake_db}",
    read_only=True,
    embedding=embeddings,
)

Deep Lake Dataset in hub://haffimazhar96/code-search already exists, loading from the storage


In [20]:
retriever = db.as_retriever()
retriever.search_kwargs["distance_metric"] = "cos"
retriever.search_kwargs["fetch_k"] = 100
retriever.search_kwargs["maximal_marginal_relevance"] = True
retriever.search_kwargs["k"] = 10

In [21]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

model = ChatOpenAI(model_name="gpt-3.5-turbo-0613")  # switch to 'gpt-4'
qa = RetrievalQA.from_llm(model, retriever=retriever)

In [23]:
query = "How are users notified when there is a new post?"
qa.run(query)

'Users are notified when there is a new post through the "NotifyUsers" Lambda function. This function is scheduled to run at a specific time using a cron expression. It fetches the new posts and checks if any users are subscribed to the corresponding blogs. If a user is subscribed, a user post entry is created and the user\'s unique identifier (userUuid) and the post\'s unique identifier (postUuid) are added to a Map called "userIdsToNotify". Finally, the function returns the Map of userUuids to postUuids, which can be used to send notifications to the users.'

In [24]:
query = "What does the frontend show?"
qa.run(query)

'The frontend shows a sidebar component, a layout component with a title, a main section with a theme button, a "Latest posts" heading, page navigation buttons, and a list of blog views. It also includes a sign in button or a profile image and a sign out button depending on whether the user is logged in or not. Additionally, there are buttons to toggle the visibility of the sidebar and to navigate to previous and next pages.'

In [25]:
query = "Where is the frontend hosted?"
qa.run(query)

"The provided pieces of context do not mention where the frontend is hosted. Therefore, I don't have enough information to answer your question."

In [26]:
query = "How could host the frontend on vercel?"
qa.run(query)

"To host the frontend on Vercel, you can follow these steps:\n\n1. Set up a Vercel account: If you don't have a Vercel account, sign up for one at https://vercel.com/signup.\n\n2. Install the Vercel CLI: Open your terminal and run the following command to install the Vercel CLI globally:\n   ```\n   npm install -g vercel\n   ```\n\n3. Build your frontend: Make sure your frontend project is ready to be deployed. This typically involves running a build command to generate the production-ready files.\n\n4. Navigate to your project directory: Open your terminal and navigate to the root directory of your frontend project.\n\n5. Log in to Vercel: Run the following command and follow the prompts to log in to your Vercel account:\n   ```\n   vercel login\n   ```\n\n6. Deploy your frontend: Run the following command to deploy your frontend to Vercel:\n   ```\n   vercel\n   ```\n\n   The Vercel CLI will guide you through the deployment process. You may be asked to provide some project-specific s