In [2]:
import os
import openai
# import deeplake 
from dotenv import load_dotenv

from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import DeepLake
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFLoader

from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import ConversationalRetrievalChain
#! memory 
from langchain.memory import ConversationBufferMemory

## Get API keys and script path

In [3]:
load_dotenv(os.getcwd()+'/keys.env')
activeloop_token = os.getenv('ACTIVELOOP_TOKEN')
deeplake_username = os.getenv('DEEPLAKE_USERNAME')
openai.api_key = os.environ.get('OPENAI_API_KEY')

codepath = '/Users/jonathanfischer/Desktop/PhD_ThesisWork/Julia/OscillatorPaper/FigureGenerationScripts/'

## Loader and splitter functions

In [4]:
def load_docs(root_dir):
    docs = []
    for dirpath, dirnames, filenames in os.walk(root_dir):
            for file in filenames:
                if file.split('.')[-1] not in ['png', 'jpeg']:
                    print(file)
                    try:
                        loader = TextLoader(os.path.join(
                            dirpath, file))
                        docs.extend(loader.load_and_split())
                    except Exception as e:
                        print(e)
                        pass
    return docs


def split_docs(docs):
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    return text_splitter.split_documents(docs)

## Load and split raw code

In [5]:
rawcode = load_docs(codepath)
splitcode = split_docs(rawcode)

Created a chunk of size 1040, which is longer than the specified 1000
Created a chunk of size 1302, which is longer than the specified 1000
Created a chunk of size 1045, which is longer than the specified 1000
Created a chunk of size 1458, which is longer than the specified 1000
Created a chunk of size 2973, which is longer than the specified 1000


NERDSS_Tuning.jl
TEST_GA.jl
PhaseDiagrams.jl
TwoParamReachability.jl
evotest.jl
TESTPlots.jl
ReachabilityAnalysis.jl
OscillatoryRegions.jl


In [6]:
embeddings = OpenAIEmbeddings()
embeddings

OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=None, openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6, request_timeout=None, headers=None)

## Load text into vectorstore

In [7]:
code_retriever = Chroma.from_documents(splitcode, embeddings).as_retriever() #initialize vectorstore into retriever

Using embedded DuckDB without persistence: data will be transient


In [15]:
chat = ChatOpenAI(model_name="gpt-3.5-turbo-16k-0613", streaming=True, callbacks=[StreamingStdOutCallbackHandler()])

In [16]:
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [17]:
condenser = ChatOpenAI(temperature=0, model='gpt-4-0613')

In [18]:
qa = ConversationalRetrievalChain.from_llm(chat,retriever=code_retriever, verbose=True, memory=memory, chain_type="map_reduce", condense_question_llm=condenser)

In [19]:
def get_chat_history(inputs) -> str:
    res = []
    for human, ai in inputs:
        res.append(f"Human:{human}\nAI:{ai}")
    return "\n".join(res)

In [20]:
query = "Improve one of the functions"
result = qa({"question": query, "chat_history": get_chat_history})



[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following portion of a long document to see if any of the text is relevant to answer the question. 
Return any relevant text verbatim.
______________________
fitness_function = make_fitness_function(eval_ic_fitness, fullprob) # Create a fitness function that includes your ODE problem as a constant

# using Debugger
#! Optimization block
function testGAfunc(evalfunc, fitness_function_factory, prob)
    population_size = 1000
    pop = generate_population(ic_values, population_size)

    myconstraints = BoxConstraints([ic_values[p]["min"] for p in keys(ic_values)], [ic_values[p]["max"] for p in keys(ic_values)])
    opts = Evolutionary.Options(abstol=1e-2, reltol=1.00, successive_f_tol = 5, iterations=5, store_trace = true, 
            show_trace=true, show_every=1, parallelization=:thread)
    common_range = 0.5; valrange = fill(comm