# Title
[]()

In [23]:
import os

# documents
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.document_loaders.csv_loader import CSVLoader

from langchain.embeddings.openai import OpenAIEmbeddings
# from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.vectorstores import FAISS
from langchain.agents.agent_toolkits import create_retriever_tool

# Creating the Agent
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent
from langchain.chat_models import ChatOpenAI

# Create memory 
from langchain.memory import ConversationBufferMemory

# from langchain.prompts import PromptTemplate
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.schema.messages import SystemMessage
from langchain.prompts import MessagesPlaceholder

from langchain.agents import AgentExecutor
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory

In [8]:
# UPDATE THESE PARAMETERS AS NEEDED
directory='../data/' # This is the directory containing the CSV/text files.

# Initialize Dictionaries
tool_dict = dict()
embeddings_dict = dict()
db_dict = dict()
retriever_dict = dict()
vector_dict = dict()
description_dict = dict()
answer_dict=dict()
conversation_dict = dict()
doc_dict = dict()
queries_dict = dict()

def create_documents(directory='../data/', glob='**/[!.]*', show_progress=True, loader_cls=CSVLoader):
    loader = DirectoryLoader(
        directory, glob=glob, show_progress=show_progress,
        loader_cls=loader_cls)

    documents = loader.load()
    print(f'Number of files: {len(documents)}')
    return documents

def create_retriever(documents, site_key, vector_dict=vector_dict, text_splitter=None):
    """
    Parameters:
        - text_splitter (optional): a text splitter object. If None, the documents are not split. 
    """
    embeddings_dict[site_key] = OpenAIEmbeddings()
    if text_splitter is None: # object type is the same (class 'langchain.schema.document.Document') whether or not the documents are split
        texts = documents
    else:
        texts = text_splitter.split_documents(documents)

    vector_dict[site_key] = FAISS.from_documents(texts, embeddings_dict[site_key])
    retriever_dict[site_key] = vector_dict[site_key].as_retriever()
    return retriever_dict

def create_tools_list(retriever_dict, description_dict):
    """
    https://api.python.langchain.com/en/latest/agents/langchain.agents.agent_toolkits.conversational_retrieval.tool.create_retriever_tool.html?highlight=create_retriever_tool#langchain.agents.agent_toolkits.conversational_retrieval.tool.create_retriever_tool
    """
    tools_list = []
    for site_key, retriever in retriever_dict.items():
        tool_name = f'search_{site_key}'
        tool = create_retriever_tool(retriever_dict[site_key], tool_name, description_dict[site_key])
        tools_list.append(tool)
    return tools_list

In [16]:
# Prepare the documents
doc_id = 1
doc_dict[doc_id] = create_documents(directory=directory, glob='*.csv')
retriever_dict = create_retriever(doc_dict[doc_id], 'recycle')
description_dict['recycle'] = """
From the Recycle BC website, this document provides the most specific information 
about whether or not an item is accepted for recycling and where to recycle it.
This should be the main resource for recycling information for residents of British Columbia.
"""
doc_dict[doc_id]

100%|██████████| 1/1 [00:00<?, ?it/s]




Number of files: 162


[Document(page_content='Conclusion: Accepted\nLocation: Curbside collection, Multi-family collection, Recycle BC depots\nCategory: Paper\nItem: Newspapers, inserts and flyers\nSource: https://recyclebc.ca/what-can-i-recycle-2/', metadata={'source': '..\\data\\Datajam_2023___Fine_Tuning_ChatBot_CSV_-_Recycle_BC_1.csv', 'row': 0}),
 Document(page_content='Conclusion: Accepted\nLocation: Curbside collection, Multi-family collection, Recycle BC depots\nCategory: Paper\nItem: Magazines and catalogues\nSource: https://recyclebc.ca/what-can-i-recycle-2/', metadata={'source': '..\\data\\Datajam_2023___Fine_Tuning_ChatBot_CSV_-_Recycle_BC_1.csv', 'row': 1}),
 Document(page_content='Conclusion: Accepted\nLocation: Curbside collection, Multi-family collection, Recycle BC depots\nCategory: Paper\nItem: Telephone books and other directories\nSource: https://recyclebc.ca/what-can-i-recycle-2/', metadata={'source': '..\\data\\Datajam_2023___Fine_Tuning_ChatBot_CSV_-_Recycle_BC_1.csv', 'row': 2}),
 Do

In [17]:

doc_id = 2
doc_dict[doc_id] = create_documents(directory=directory, glob='*.txt', loader_cls=TextLoader)
retriever_dict = create_retriever(doc_dict[doc_id], 'mattress')
description_dict['mattress'] = """
Information from the City of Vancouver website about how to recycle mattresses.
"""

100%|██████████| 1/1 [00:00<00:00, 476.73it/s]


Number of files: 1


In [19]:
retriever_dict = create_retriever(doc_dict[1], 'recycle')

tool_id = 1
tool_dict[tool_id] = create_tools_list(retriever_dict, description_dict)
tool_dict[tool_id]

[Tool(name='search_recycle', description='\nFrom the Recycle BC website, this document provides the most specific information \nabout whether or not an item is accepted for recycling and where to recycle it.\nThis should be the main resource for recycling information for residents of British Columbia.\n', args_schema=None, return_direct=False, verbose=False, callbacks=None, callback_manager=None, tags=None, metadata=None, handle_tool_error=False, func=<bound method BaseRetriever.get_relevant_documents of VectorStoreRetriever(tags=['FAISS'], metadata=None, vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x00000223B1079FD0>, search_type='similarity', search_kwargs={})>, coroutine=<bound method BaseRetriever.aget_relevant_documents of VectorStoreRetriever(tags=['FAISS'], metadata=None, vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x00000223B1079FD0>, search_type='similarity', search_kwargs={})>),
 Tool(name='search_mattress', description='\nInformation from the City

In [25]:

def create_chatbot(tools, verbose=True):

    llm = ChatOpenAI(
        temperature = 0,
        openai_organization=os.environ['openai_organization'],
        openai_api_key=os.environ['openai_api_key'],
        )

    memory = AgentTokenBufferMemory(memory_key='chat_history', llm=llm)
    system_message = SystemMessage(
        content=("""
            You are a helpful assistant who provides concise answers to residents in Metro Vancouver, Canada.
            You ask enough follow up questions as needed to provide the most relevant answer. 
            Where relevant, you retrieve the relevant information from your documents to answer the resident's question.
            Recycle BC is the main resource for recycling information. 
            Respond to the resident's query, which are delimited by triple backticks: ```{question}```
            """
        ),
        input_variables=['question']
    )
    
    prompt = OpenAIFunctionsAgent.create_prompt(
        system_message=system_message,
        extra_prompt_messages=[
            MessagesPlaceholder(variable_name='chat_history')
            ]
    )

    agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
    agent_executor = AgentExecutor(
        agent=agent, tools=tools, memory=memory, verbose=verbose, return_intermediate_steps=True
        )
    agent_info = {
        'agent': agent,
        'agent_executor': agent_executor,
        'memory': memory,
        'chat_history': []
    }
    return agent_info

def chat_with_chatbot(user_input, agent_info):

    print(f'Chat history length: {len(agent_info["chat_history"])}')

    result = agent_info['agent_executor']({
        "input": user_input,
        "chat_history": agent_info['chat_history']
        })
    agent_info['chat_history'].append(result['chat_history'])
    
    return result

conversation_id = 1
input_id = 1
query1 = "What are my options for recycling coffee cups?"
query2 = "I live in an apartment."
query3 = "Thank you so much!"
queries_dict[conversation_id] = [query1, query2, query3]

conversation_dict[conversation_id] = create_chatbot(tool_dict[tool_id])
for query in queries_dict[conversation_id]:
    answer_dict[conversation_id] = chat_with_chatbot(query, conversation_dict[conversation_id])
for message in conversation_dict[conversation_id]['chat_history'][-1]:
    print(message)

Chat history length: 0


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `search_recycle` with `coffee cups`


[0m[36;1m[1;3m[Document(page_content='Conclusion: Accepted\nLocation: Curbside collection, Multi-family collection, Recycle BC depots\nCategory: Cartons and Paper Cups\nItem: Paper cups for hot and cold beverages\nSource: https://recyclebc.ca/what-can-i-recycle-2/', metadata={'source': '..\\data\\Datajam_2023___Fine_Tuning_ChatBot_CSV_-_Recycle_BC_1.csv', 'row': 71}), Document(page_content='Conclusion: Accepted\nLocation: Curbside collection, Multi-family collection, Recycle BC depots\nCategory: Plastic Containers\nItem: Empty single-use coffee and tea pods; remove lids and do not include lids with recycling\nSource: https://recyclebc.ca/what-can-i-recycle-2/', metadata={'source': '..\\data\\Datajam_2023___Fine_Tuning_ChatBot_CSV_-_Recycle_BC_1.csv', 'row': 42}), Document(page_content='Conclusion: Not Accepted\nLocation: \nCategory: Plastic Containers\

## Create a function to run queries 

In [12]:
conversation_id = 1
input_id = 1
query1 = "What are my options for recycling coffee cups?"
query2 = "I live in an apartment."
query3 = "Thank you so much!"
queries_dict[conversation_id] = [query1, query2, query3]

conversation_dict[conversation_id] = create_chatbot(tool_dict[tool_id])
for query in queries_dict[conversation_id]:
    answer_dict[conversation_id] = chat_with_chatbot(query, conversation_dict[conversation_id])
for message in conversation_dict[conversation_id]['chat_history'][-1]:
    print(message)

{}

# *End of Page*