In [26]:
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from dotenv import load_dotenv
load_dotenv()  # Load environment variables from .env file

True

In [27]:
#Read OPENAI_API_KEY from environment variable
openai_api_key = os.environ['OPENAI_API_KEY']
embeddings = OpenAIEmbeddings()

In [29]:
#Login to ActiveLoop using API Key environment variable
active_loop_api_key = os.environ['ACTIVE_LOOP_API_KEY']
!activeloop login -t $active_loop_api_key

Successfully logged in to Activeloop.


In [31]:
import os
from langchain.document_loaders import TextLoader

root_dir = 'repo'
docs = []
for dirpath, dirnames, filenames in os.walk(root_dir):
    for file in filenames:
        try:
            loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')
            docs.extend(loader.load_and_split())
        except:
            pass

In [None]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
texts = text_splitter.split_documents(docs)

In [None]:
username = os.environ['username']
db = DeepLake(
    dataset_path=f"hub://{username}/jockmkt",
    embedding_function=embeddings,
)
db.add_documents(texts)

In [51]:
retriever = db.as_retriever()
retriever.search_kwargs["distance_metric"] = "cos"
retriever.search_kwargs["fetch_k"] = 100
retriever.search_kwargs["maximal_marginal_relevance"] = True
retriever.search_kwargs["k"] = 10

In [52]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

model = ChatOpenAI(model_name="gpt-3.5-turbo")
qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)

In [None]:
questions = [
    "What are these repos about?"
]
chat_history = []

for question in questions:
    result = qa({"question": question, "chat_history": chat_history})
    chat_history.append((question, result["answer"]))
    print(f"-> **Question**: {question} \n")
    print(f"**Answer**: {result['answer']} \n")