In [5]:
import os
import openai
import deeplake 
from dotenv import load_dotenv
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader

In [2]:
load_dotenv('/Users/jonathanfischer/Desktop/AI_CodingFun/Tests/.env.AIKEYS')
activeloop_token = os.getenv('ACTIVELOOP_TOKEN')
deeplake_username = os.getenv('DEEPLAKE_USERNAME')
openai.api_key = os.environ.get('OPENAI_API_KEY')

dirname = '/Users/jonathanfischer/Desktop/AI_CodingFun/LangChainQueryTexts/ThesisPapers'

In [3]:

loader = PyPDFLoader("/Users/jonathanfischer/Desktop/AI_CodingFun/LangChainQueryTexts/ThesisPapers/DesignPrinciplesOscillators.pdf")
pages = loader.load_and_split()

In [4]:
pages

[Document(page_content='Design Principles of Biochemical Oscillators\nBéla Novak* and John J. Tyson#\n*Oxford Centre for Integrative Systems Biology, Department of Biochemistry, University of Oxford,\nSouth Parks Road, Oxford OX1 3QU, UK\n#Department of Biological Sciences, Virginia Polytechnic Institute & State University, Blacksburg\nVA 24061, USA\nAbstract\nCellular rhythms are generated by complex interactions among genes, proteins and metabolites.\nThey are used to control every aspect of cell physiology from signaling, motility and development\nto growth, division and death. By considering specific examples of oscillatory processes, we pick\nout three general requirements for biochemical oscillations: delayed negative feedback, sufficient\n‘nonlinearity’ of the reaction kinetics, and proper balancing of the time-scales of opposing\nchemical reactions. Positive feedback is one mechanism to delay the negative feedback signal.\nBiological oscillators can be classified according to t

In [3]:
def load_docs(root_dir):
    docs = []
    for dirpath, dirnames, filenames in os.walk(root_dir):
        for file in filenames:
            print(file)
            try:
                loader = PyPDFLoader(os.path.join(
                    dirpath, file))
                docs.extend(loader.load_and_split())
            except Exception as e:
                print(e)
                pass
    return docs


def split_docs(docs):
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    return text_splitter.split_documents(docs)


def main(root_dir, deep_lake_path):
    docs = load_docs(root_dir)
    texts = split_docs(docs)
    embeddings = OpenAIEmbeddings()
    
    db = DeepLake(dataset_path=deep_lake_path, embedding_function=embeddings)
    
    db.add_documents(texts)

In [6]:
docs = load_docs(dirname)
texts = split_docs(docs)

embeddings = OpenAIEmbeddings()
embeddings

SelfAssemblyOscillations.pdf
Osman.pdf
Sensitivity_analysis_of_autonomous_oscillations_ap.pdf
DesignPrinciplesOscillators.pdf
TunableOscillator.pdf
GeneticAlgorithm.pdf


OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', document_model_name='text-embedding-ada-002', query_model_name='text-embedding-ada-002', embedding_ctx_length=8191, openai_api_key=None, openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6)

In [7]:
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [8]:
docsearch = Chroma.from_documents(texts, embeddings)

Using embedded DuckDB without persistence: data will be transient


In [12]:
qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(model_name="gpt-4",streaming=True), chain_type="stuff", retriever=docsearch.as_retriever())

In [13]:
query = "What are the design principles of biochemical oscillators?"
qa.run(query)

'The design principles of biochemical oscillators can be summarized as follows:\n\n1. Negative feedback: This is necessary to bring the reaction network back to the starting point of its oscillation. The negative feedback signal ensures that if a concentration of a component gets too large, it will eventually decrease, and if it gets too small, it will eventually increase.\n\n2. Time delay: The negative feedback signal must be sufficiently delayed in time so that the chemical reactions do not home in on a stable steady state. Time-delay can be created by a physical constraint, a long chain of reaction intermediates, or by dynamical hysteresis (overshoot and undershoot) as consequences of positive feedback in the reaction mechanism.\n\n3. Nonlinearity: The kinetic rate laws of the reaction mechanism must be sufficiently nonlinear to destabilize the steady state. In biochemical reaction kinetics, there are many sources of nonlinearity that are conducive to oscillations.\n\n4. Proper bala

In [None]:
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate