In [128]:
import os
from os.path import join
import json
import openai
from IPython.display import display, Markdown

# chatOpenAI within langchain
from langchain.chat_models import ChatOpenAI
from langchain.chat_models import AzureChatOpenAI

# prompt template refers to a reproducible way to generate a prompt
from langchain.prompts import ChatPromptTemplate

# LangChain for question answering over a list of documents
from langchain.chains import RetrievalQA

# Load data from a source as Document's. A Document is a piece of text and associated metadata.
from langchain.document_loaders import CSVLoader

# DocArrayInMemorySearch is a document index provided by Docarray that stores documents in memory
from langchain.vectorstores import DocArrayInMemorySearch


#takes care of storing embedded data and performing vector search for you
from langchain.indexes import VectorstoreIndexCreator

#pdf loader
from langchain.document_loaders import PyPDFLoader

# QA evaluator
from langchain.evaluation.qa import QAGenerateChain
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain


# environment variables

In [2]:
CURRENT_PATH = os.getcwd()
CONFIG_FILE = "config.json"

# Load Config File
with open(join(CURRENT_PATH, CONFIG_FILE)) as file:
    # Load the JSON data
    config = json.load(file)

# Environmental Variables
AZURE_OPENAI_KEY = config['AZURE_OPENAI_KEY']
AZURE_OPENAI_ENDPOINT = config['AZURE_OPENAI_ENDPOINT']
AZURE_ENGINE_NAME = config['AZURE_ENGINE_NAME']
AZURE_ADA_NAME = config['AZURE_ADA_NAME']
AZURE_ADA_ENDPOINT = config['AZURE_ADA_ENDPOINT']
AZURE_ADA_KEY = config['AZURE_ADA_KEY']

# OpenAI ADA API embeddings 
openai_ada_key = AZURE_ADA_KEY
openai_ada_base = AZURE_ADA_ENDPOINT
openai_ada_deployment = AZURE_ADA_NAME
openai_api_type = 'azure'
openai_api_key = AZURE_OPENAI_KEY

In [3]:
ai=AzureChatOpenAI(openai_api_base=AZURE_OPENAI_ENDPOINT
         ,openai_api_key=AZURE_OPENAI_KEY
         ,openai_api_type='azure'
         ,openai_api_version="2023-05-15"
         ,deployment_name =AZURE_ENGINE_NAME
         ,model = "gpt-3.5-turbo"
         ,temperature=0.1
    )

# load documents

In [99]:
HANDBOOK_PATH = 'onboarding docs'
HANDBOOK_FILE = 'chart_valve.json'

In [100]:
# load JSON
import json
file = json.load(open(join(HANDBOOK_PATH,HANDBOOK_FILE)))

In [133]:
docs=docum.split("key")

# embedding & vectorstore

In [104]:
from langchain.embeddings.openai import OpenAIEmbeddings
embedding=OpenAIEmbeddings(openai_api_key=openai_ada_key
                              , openai_api_base = openai_ada_base
                              , deployment = openai_ada_deployment
                              , openai_api_type = openai_api_type
                              ,chunk_size=1)

In [105]:
from langchain.vectorstores import Chroma

persist_directory='docs/chroma'
vectordb=Chroma.from_texts(
    texts=docs,
    embedding=embedding,
    persist_directory=persist_directory
)

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the Embeddings_Create Operation under Azure OpenAI API version 2022-12-01 have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 7 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the Embeddings_Create Operation under Azure OpenAI API version 2022-12-01 have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 3 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the Embeddings_Cr

In [106]:
print(vectordb._collection.count())

41


In [135]:
vectordb

<langchain.vectorstores.chroma.Chroma at 0x21501bf9ad0>

# similarity

In [None]:
#returns chunk with the highest similarity to the question (or top n chunks)

In [159]:
question = "Michelle"
docs1 = vectordb.similarity_search(question,k=3)

In [120]:
#len(docs)

In [121]:
docs1

[Document(page_content='": 11, "department": "Design", "title": "Art Director", "parent": 3, "name": "Michelle Green", "ask me about": "Michelle Green is the Art Director at Valve. She oversees the art department, guiding the visual style and artistic direction of Valve\'s games. Michelle has a strong background in concept art and visual design, contributing her expertise to games like Half-Life: Alyx and Team Fortress 2."}, "', metadata={}),
 Document(page_content='": 34, "department": "Administrative", "title": "Project Manager", "parent": 8, "name": "Michelle Clark", "ask me about": "Michelle Clark is a Project Manager at Valve. She oversees and coordinates the planning and execution of various projects within Valve. Michelle\'s expertise in project management methodologies, resource allocation, and risk assessment helps ensure the successful delivery of Valve\'s initiatives."}, "', metadata={}),
 Document(page_content='": 4, "department": "Design", "title": "Game Designer", "parent

In [122]:
docs1[0].page_content

'": 11, "department": "Design", "title": "Art Director", "parent": 3, "name": "Michelle Green", "ask me about": "Michelle Green is the Art Director at Valve. She oversees the art department, guiding the visual style and artistic direction of Valve\'s games. Michelle has a strong background in concept art and visual design, contributing her expertise to games like Half-Life: Alyx and Team Fortress 2."}, "'

# memory

In [123]:
from langchain.memory import ConversationSummaryMemory
memory = ConversationSummaryMemory(memory_key="chat_history", llm=AzureChatOpenAI(openai_api_base=AZURE_OPENAI_ENDPOINT
                 ,openai_api_key=AZURE_OPENAI_KEY
                 ,openai_api_type='azure'
                 ,openai_api_version="2023-05-15"
                 ,deployment_name =AZURE_ENGINE_NAME
                 ,model = "gpt-3.5-turbo"
                 ,temperature=0
               ), return_messages=True)

# alternative - qa chain type stuff - BEST

In [124]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(memory_key="chat_history", input_key="question")

In [125]:
from langchain.memory import ConversationSummaryMemory
memory = ConversationSummaryMemory(memory_key="chat_history", llm=AzureChatOpenAI(openai_api_base=AZURE_OPENAI_ENDPOINT
         ,openai_api_key=AZURE_OPENAI_KEY
         ,openai_api_type='azure'
         ,openai_api_version="2023-05-15"
         ,deployment_name =AZURE_ENGINE_NAME
         ,model = "gpt-3.5-turbo"
         ,temperature=0.1
    ), return_messages=True, input_key="question")

In [165]:
prompt_template = """Given the following information regarding company workers, give three recommendations about the best person to talk about a given topic.

{context}

Topic: {question}
"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain = load_qa_chain(ai, chain_type="stuff", prompt=PROMPT, memory=memory)

In [167]:
query="The handbook for new employees at Valve describes the company as having a flat structure, where there is no management and nobody reports to anybody else. Employees are encouraged to pick their own projects and are responsible for prioritizing their own work. The company is described as being focused on creating a place where talented individuals are empowered to put their best work into the hands of millions of people, with very little in their way. The handbook also emphasizes the importance of hiring and the responsibility of all employees to focus on the long-term goals of the company"
chain({"input_documents": vectordb.similarity_search(query,k=3), "question": query}, return_only_outputs=True)

{'output_text': "1. Gabe Newell - as the CEO and co-founder of Valve, he would have a deep understanding of the company's structure and values, as well as the long-term goals and vision for the company.\n\n2. Sarah Thompson - as the Human Resources Manager, she would have a thorough understanding of the company's culture and values, as well as the hiring process and how new employees are onboarded into the company.\n\n3. Michelle Clark - as a Project Manager, she would have experience working within the flat structure of Valve and could provide insight into how employees prioritize their work and collaborate on projects without traditional management structures."}

In [63]:
print(chain.memory.buffer)

The human asks the AI about the most important information in a document, and the AI responds that the unique structure and culture of Valve, as described in their handbook for new employees, is the most important information. The handbook emphasizes a flat organization where employees have freedom to choose their own projects and make decisions without traditional management. The company is focused on empowering talented individuals to put their best work into the hands of millions of people, with very little in their way. According to the AI, the first thing to do when joining Valve is to read the "Handbook for New Employees."


In [64]:
chain.memory.chat_memory.messages

[HumanMessage(content='What is the most important information in the document?', additional_kwargs={}, example=False),
 AIMessage(content="The most important information in the document is the unique structure and culture of Valve as a flat organization where employees have the freedom to choose their own projects and make decisions without traditional management. This is emphasized throughout the handbook and is a key aspect of Valve's approach to work. (Source: Valve Handbook for New Employees, various pages)", additional_kwargs={}, example=False),
 HumanMessage(content='How is it to work at Valve?', additional_kwargs={}, example=False),
 AIMessage(content='Answer: The handbook for new employees at Valve describes the company as having a flat structure, where there is no management and nobody reports to anybody else. Employees are encouraged to pick their own projects and are responsible for prioritizing their own work. The company is described as being focused on creating a place wh