In [None]:
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
from langchain.prompts import StringPromptTemplate
from langchain import OpenAI, SerpAPIWrapper, LLMChain
from typing import List, Union
from langchain.schema import AgentAction, AgentFinish, HumanMessage
import re
import getpass
from langchain.document_loaders import TextLoader
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

In [None]:
OPENAI_API_KEY = getpass.getpass('Key')
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [20]:
import chromadb
from chromadb.config import Settings

client = chromadb.Client(Settings(
    chroma_db_impl="duckdb+parquet",
    persist_directory=".chromadb" # Optional, defaults to .chromadb/ in the current directory
))

Using embedded DuckDB with persistence: data will be stored in: .chromadb


In [27]:
embeddings = OpenAIEmbeddings()

loader = TextLoader("12fa.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(
    separator = ".",
    chunk_size = 512,
    chunk_overlap  = 0,
    length_function = len,
)
texts = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
docsearch = Chroma().from_documents(texts, embeddings)

Created a chunk of size 637, which is longer than the specified 512
Using embedded DuckDB without persistence: data will be transient
No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction
Using embedded DuckDB without persistence: data will be transient


In [None]:
from langchain.vectorstores import Chroma

from chromadb.config import Settings
client = chromadb.Client(Settings(
    chroma_db_impl="duckdb+parquet",
    persist_directory="/path/to/persist/directory" # Optional, defaults to .chromadb/ in the current directory
))

docsearch = Chroma.from_documents(texts, embeddings)

In [None]:
len(texts)

In [None]:
def format_chat(chat_history):
    formatted_history = ''
    for message in chat_history:
        formatted_history += f"{message['role']}: {message['content']} \n\n"
    formatted_history = formatted_history.strip().rstrip("\r\n")
    return formatted_history

In [None]:
from langchain.prompts import PromptTemplate
prompt_template = """
You are now Wyl, you can perform a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. 
You have the following personality traits: funny, always making jokes, responding with corporate but edgy insults. 
If a suggestion violates 12FA rules, make fun of the question and call the asker a noob.
Use the following pieces of context to respond to the user's question or statement at the end. 

{context}

If you don't know the answer from the context below, just say that you don't know, don't try to make up an answer.
Keep in mind the following chat history between the user and Wyl:

{chat_history}
ElfAi:
"""

question_prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "chat_history"]
)

In [None]:
llm = ChatOpenAI(temperature=0) #OpenAI(temperature=0, model_name="text-davinci-003")

llm_chain = LLMChain(prompt=question_prompt, llm=llm, verbose=False)
chat_history = []
question = ""

def respond(llm_chain, docsearch, question, chat_history):
    docs = docsearch.similarity_search_with_score(question, 3)
    context = "\n".join([doc[0].page_content.replace("\n", " ") for doc in docs])
    
    chat_history.append({"role": "User", "content": question})
    formatted_history = format_chat(chat_history)

    output = llm_chain.predict(context=context, chat_history=formatted_history)
    chat_history.append({"role": "Wyl", "content": output})
    return chat_history

In [None]:
chat_history = respond(llm_chain, docsearch, "Is hardcoding config in code ok?", chat_history)
print(format_chat(chat_history))

In [None]:
chat_history = respond(llm_chain, docsearch, "What are the benefits of using enviroment variables as opposed to hardcoding?", chat_history)
print(format_chat(chat_history))

In [None]:
chat_history = respond(llm_chain, docsearch, "I think we could just run with a stateful service, Wyl what do you think? ", chat_history)
print(format_chat(chat_history))

In [None]:
chat_history = respond(llm_chain, docsearch, "Write some example python code on how we can externalise config", chat_history)
print(format_chat(chat_history))