In [1]:
import os
import openai
# import deeplake 
from dotenv import load_dotenv
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader

In [5]:
load_dotenv(os.getcwd()+'/.env.AIKEYS')
activeloop_token = os.getenv('ACTIVELOOP_TOKEN')
deeplake_username = os.getenv('DEEPLAKE_USERNAME')
openai.api_key = os.environ.get('OPENAI_API_KEY')

pdfdirname = os.getcwd()+'/LangChainQueryTexts'

In [7]:
def load_docs(root_dir):
    docs = []
    for dirpath, dirnames, filenames in os.walk(root_dir):
        for file in filenames:
            print(file)
            try:
                loader = PyPDFLoader(os.path.join(
                    dirpath, file))
                docs.extend(loader.load_and_split())
            except Exception as e:
                print(e)
                pass
    return docs


def split_docs(docs):
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    return text_splitter.split_documents(docs)


def main(root_dir, deep_lake_path):
    docs = load_docs(root_dir)
    texts = split_docs(docs)
    embeddings = OpenAIEmbeddings()
    
    db = DeepLake(dataset_path=deep_lake_path, embedding_function=embeddings)
    
    db.add_documents(texts)

## Load, split, and embed text

In [9]:
docs = load_docs(pdfdirname)
texts = split_docs(docs)

embeddings = OpenAIEmbeddings()
embeddings

OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='2022-12-01', openai_api_base=None, openai_api_type=None, embedding_ctx_length=8191, openai_api_key=None, openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6)

In [10]:
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [11]:
docsearch = Chroma.from_documents(texts, embeddings)

ValueError: Could not import chromadb python package. Please install it with `pip install chromadb`.

In [12]:
qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name="gpt-4",streaming=True), chain_type="stuff", retriever=docsearch.as_retriever())

In [13]:
query = "What are the design principles of biochemical oscillators?"
qa.run(query)

'The design principles of biochemical oscillators can be summarized as follows:\n\n1. Negative feedback: This is necessary to bring the reaction network back to the starting point of its oscillation. The negative feedback signal ensures that if a concentration of a component gets too large, it will eventually decrease, and if it gets too small, it will eventually increase.\n\n2. Time delay: The negative feedback signal must be sufficiently delayed in time so that the chemical reactions do not home in on a stable steady state. Time-delay can be created by a physical constraint, a long chain of reaction intermediates, or by dynamical hysteresis (overshoot and undershoot) as consequences of positive feedback in the reaction mechanism.\n\n3. Nonlinearity: The kinetic rate laws of the reaction mechanism must be sufficiently nonlinear to destabilize the steady state. In biochemical reaction kinetics, there are many sources of nonlinearity that are conducive to oscillations.\n\n4. Proper bala

In [None]:
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate