In [1]:
import os
from os.path import join
import json
import openai
from IPython.display import display, Markdown

# chatOpenAI within langchain
from langchain.chat_models import ChatOpenAI
from langchain.chat_models import AzureChatOpenAI

# prompt template refers to a reproducible way to generate a prompt
from langchain.prompts import ChatPromptTemplate

# LangChain for question answering over a list of documents
from langchain.chains import RetrievalQA

# Load data from a source as Document's. A Document is a piece of text and associated metadata.
from langchain.document_loaders import CSVLoader

# DocArrayInMemorySearch is a document index provided by Docarray that stores documents in memory
from langchain.vectorstores import DocArrayInMemorySearch


#takes care of storing embedded data and performing vector search for you
from langchain.indexes import VectorstoreIndexCreator

#pdf loader
from langchain.document_loaders import PyPDFLoader

# QA evaluator
from langchain.evaluation.qa import QAGenerateChain
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import CharacterTextSplitter

from langchain.chains import SequentialChain
from langchain.chains import SimpleSequentialChain

# environment variables

In [2]:
CURRENT_PATH = os.getcwd()
CONFIG_FILE = "config.json"

# Load Config File
with open(join(CURRENT_PATH, CONFIG_FILE)) as file:
    # Load the JSON data
    config = json.load(file)

# Environmental Variables
AZURE_OPENAI_KEY = config['AZURE_OPENAI_KEY']
AZURE_OPENAI_ENDPOINT = config['AZURE_OPENAI_ENDPOINT']
AZURE_ENGINE_NAME = config['AZURE_ENGINE_NAME']
AZURE_ADA_NAME = config['AZURE_ADA_NAME']
AZURE_ADA_ENDPOINT = config['AZURE_ADA_ENDPOINT']
AZURE_ADA_KEY = config['AZURE_ADA_KEY']

# OpenAI ADA API embeddings 
openai_ada_key = AZURE_ADA_KEY
openai_ada_base = AZURE_ADA_ENDPOINT
openai_ada_deployment = AZURE_ADA_NAME
openai_api_type = 'azure'
openai_api_key = AZURE_OPENAI_KEY

In [3]:
ai=AzureChatOpenAI(openai_api_base=AZURE_OPENAI_ENDPOINT
         ,openai_api_key=AZURE_OPENAI_KEY
         ,openai_api_type='azure'
         ,openai_api_version="2023-05-15"
         ,deployment_name =AZURE_ENGINE_NAME
         ,model = "gpt-3.5-turbo"
         ,temperature=0.1
    )

# load documents

In [39]:
PATH = 'onboarding docs'
FILE = 'Valve_Handbook_LowRes-1-10.pdf'

In [40]:
# load PDF File
loader = PyPDFLoader(join(PATH,FILE))
data_handbook = loader.load()

text_splitter= CharacterTextSplitter(
    separator='\n',
    chunk_size=1000,
    chunk_overlap=150,
    length_function=len
)
data_handbook=text_splitter.split_documents(data_handbook)

In [41]:
PATH = 'onboarding docs'
FILE = 'chart_valve.json'

In [42]:
# load JSON
import json
chart_file = json.load(open(join(PATH,FILE)))
chart_file=json.dumps(chart_file).split("key")

# embedding & vectorstore

In [43]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding=OpenAIEmbeddings(openai_api_key=openai_ada_key
                              , openai_api_base = openai_ada_base
                              , deployment = openai_ada_deployment
                              , openai_api_type = openai_api_type
                              ,chunk_size=1)

In [44]:
from langchain.vectorstores import Chroma

persist_directory='docs/chroma'
vectordb_chart=Chroma.from_texts(
    texts=chart_file,
    embedding=embedding
)

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the Embeddings_Create Operation under Azure OpenAI API version 2022-12-01 have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 7 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the Embeddings_Create Operation under Azure OpenAI API version 2022-12-01 have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 3 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the Embeddings_Cr

In [46]:
retriever=vectordb_chart.as_retriever()

In [10]:
print(vectordb_chart._collection.count())

41


In [11]:
persist_directory='docs/chroma'
vectordb_handbook=Chroma.from_documents(
    documents=data_handbook,
    embedding=embedding,
    persist_directory=persist_directory
)

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the Embeddings_Create Operation under Azure OpenAI API version 2022-12-01 have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 7 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the Embeddings_Create Operation under Azure OpenAI API version 2022-12-01 have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 3 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..


# retriever

In [12]:
handbook_retriever = vectordb_handbook.as_retriever(search_type="similarity")

In [13]:
chart_retriever = vectordb_chart.as_retriever(search_type="similarity")

# memory

In [14]:
from langchain.memory import ConversationSummaryMemory
memory_buddy = ConversationSummaryMemory(memory_key="chat_history", llm=AzureChatOpenAI(openai_api_base=AZURE_OPENAI_ENDPOINT
         ,openai_api_key=AZURE_OPENAI_KEY
         ,openai_api_type='azure'
         ,openai_api_version="2023-05-15"
         ,deployment_name =AZURE_ENGINE_NAME
         ,model = "gpt-3.5-turbo"
         ,temperature=0.1
    ), return_messages=True, input_key="question")

In [15]:
from langchain.memory import ConversationSummaryMemory
memory_network = ConversationSummaryMemory(memory_key="chat_history", llm=AzureChatOpenAI(openai_api_base=AZURE_OPENAI_ENDPOINT
         ,openai_api_key=AZURE_OPENAI_KEY
         ,openai_api_type='azure'
         ,openai_api_version="2023-05-15"
         ,deployment_name =AZURE_ENGINE_NAME
         ,model = "gpt-3.5-turbo"
         ,temperature=0.1
    ), return_messages=True, input_key="question")

# qa chain type stuff - BEST BABY

## study buddy btch

In [16]:
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
Return the answer together with the source (document and pages).

{context}

Question: {question}
"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain_sb = load_qa_chain(ai, chain_type="stuff", prompt=PROMPT, memory=memory_buddy, output_key="topic")

In [19]:
chain_sb({"input_documents": handbook_retriever.get_relevant_documents(query), "question": query}, return_only_outputs=True)

{'topic': "I'm sorry, but I don't have enough information to recap the document based on the given context."}

## networking mf

In [36]:
prompt_template = """Given the following information regarding company workers, give up to three recommendations about the best person to talk about:
Only recommend someone if you are confident they are a relevant person for the topic and if the topic is clear. Otherwise, don't recommend anyone.
 
Context about company workers:{context}

Topic: {question}
"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain_nw = load_qa_chain(ai, chain_type="stuff", prompt=PROMPT, memory=memory_network, output_key="people")

In [37]:
query="Who knows about sandwiches?"#"Does Valve have data science models?"

In [38]:
chain_nw({"input_documents": chart_retriever.get_relevant_documents(query), "question": memory_buddy.chat_memory.messages[-1].content}, return_only_outputs=True)

{'people': 'Based on the given information, I cannot recommend anyone to talk about the topic as there is not enough information provided to determine a relevant person.'}

In [None]:
memory_buddy.chat_memory.messages[-1].content

In [None]:
memory_network.chat_memory.messages[-1].content

# alternatives that didn't work

In [64]:
question_prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
Return the answer together with the source (document and pages).

{context}

Question: {question}"""
QUESTION_PROMPT = PromptTemplate(
    template=question_prompt_template, input_variables=["context", "question"]
)

combine_prompt_template = """Given the following information regarding company workers, give three recommendations about the best person to talk about a given topic.

{chart}

Topic: {summaries}"""
COMBINE_PROMPT = PromptTemplate(
    template=combine_prompt_template, input_variables=["summaries", "chart"]
)

In [65]:
chain = load_qa_chain(ai, chain_type="map_reduce", question_prompt=QUESTION_PROMPT, combine_prompt=COMBINE_PROMPT, return_map_steps=True)

In [68]:
result=chain({"input_documents": vectordb_handbook.similarity_search(query,k=3), "question": query, "chart":vectordb_chart.similarity_search(query,k=3)}, return_only_outputs=True)

In [69]:
result

{'intermediate_steps': ['According to the given context, it is not specified what exactly an employee should do on their first day at Valve.'],
 'output_text': "Based on the given information, here are three recommendations for the best person to talk to about the topic of what an employee should do on their first day at Valve:\n\n1. Linda Walker: As a Public Relations Specialist, Linda manages Valve's public image and coordinates communication strategies. She may have insights into the onboarding process and can provide information on what new employees typically do on their first day.\n\n2. Rachel Thompson: Being a System Administrator, Rachel is responsible for maintaining and troubleshooting Valve's server infrastructure. She may have knowledge of the technical aspects of employee onboarding, such as setting up accounts, accessing systems, and familiarizing new hires with the company's technology.\n\n3. Michelle Green: As the Art Director, Michelle oversees the art department and g

In [49]:
vectordb_handbook.as_retriever(search_kwargs={'k':3})

VectorStoreRetriever(tags=None, metadata=None, vectorstore=<langchain.vectorstores.chroma.Chroma object at 0x0000023EBEF4D090>, search_type='similarity', search_kwargs={'k': 3})

In [36]:
full_chain=SequentialChain(chains=[chain_sb, chain_nw],input_variables=["context", "question", "chart"],output_variables=["topic", "people"], verbose=True)

ValidationError: 1 validation error for SequentialChain
__root__
  Missing required input keys: {'input_documents'}, only had {'context', 'question', 'chart'} (type=value_error)

In [25]:
query="What should I do on my first day?"
full_chain({"input_documents": vectordb_handbook.similarity_search(query,k=3), "question": query}, return_only_outputs=True)

ValueError: Missing some input keys: {'input'}

In [167]:
query="The handbook for new employees at Valve describes the company as having a flat structure, where there is no management and nobody reports to anybody else. Employees are encouraged to pick their own projects and are responsible for prioritizing their own work. The company is described as being focused on creating a place where talented individuals are empowered to put their best work into the hands of millions of people, with very little in their way. The handbook also emphasizes the importance of hiring and the responsibility of all employees to focus on the long-term goals of the company"
chain({"input_documents": vectordb.similarity_search(query,k=3), "question": query}, return_only_outputs=True)

{'output_text': "1. Gabe Newell - as the CEO and co-founder of Valve, he would have a deep understanding of the company's structure and values, as well as the long-term goals and vision for the company.\n\n2. Sarah Thompson - as the Human Resources Manager, she would have a thorough understanding of the company's culture and values, as well as the hiring process and how new employees are onboarded into the company.\n\n3. Michelle Clark - as a Project Manager, she would have experience working within the flat structure of Valve and could provide insight into how employees prioritize their work and collaborate on projects without traditional management structures."}

In [63]:
print(chain.memory.buffer)

The human asks the AI about the most important information in a document, and the AI responds that the unique structure and culture of Valve, as described in their handbook for new employees, is the most important information. The handbook emphasizes a flat organization where employees have freedom to choose their own projects and make decisions without traditional management. The company is focused on empowering talented individuals to put their best work into the hands of millions of people, with very little in their way. According to the AI, the first thing to do when joining Valve is to read the "Handbook for New Employees."


In [64]:
chain.memory.chat_memory.messages

[HumanMessage(content='What is the most important information in the document?', additional_kwargs={}, example=False),
 AIMessage(content="The most important information in the document is the unique structure and culture of Valve as a flat organization where employees have the freedom to choose their own projects and make decisions without traditional management. This is emphasized throughout the handbook and is a key aspect of Valve's approach to work. (Source: Valve Handbook for New Employees, various pages)", additional_kwargs={}, example=False),
 HumanMessage(content='How is it to work at Valve?', additional_kwargs={}, example=False),
 AIMessage(content='Answer: The handbook for new employees at Valve describes the company as having a flat structure, where there is no management and nobody reports to anybody else. Employees are encouraged to pick their own projects and are responsible for prioritizing their own work. The company is described as being focused on creating a place wh