### Importing packages and setting OpenAI as our LLM

In [142]:
# %pip install PyPDF2
# %pip install pdfminer.six

import os
from langchain_community.document_loaders import WebBaseLoader, PyPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_openai import ChatOpenAI
from PyPDF2 import PdfFileReader
from pdfminer.high_level import extract_text

# establishing ChatOpenAI as our language model
openai_api_key = "..."
llm = ChatOpenAI(openai_api_key = openai_api_key)

### Setting up the corpus (~500 pdfs from the MA 20-80 DPU docket) with vector embeddings

In [None]:
# don't need to run this again, unless the pdf files change (just refer to the vector store below).

# pdf_folder = "pdf_files"

# # Get a list of all PDF files in the folder
# pdf_files = [os.path.join(pdf_folder, file) for file in os.listdir(pdf_folder) if file.endswith(".pdf")]

# # Create a corpus with the filenames
# corpus = [os.path.splitext(os.path.basename(file))[0] for file in pdf_files]

# pdf1 = "pdf_files/pdf_1.pdf"

# # initialize the loader
# loader = PyPDFLoader(pdf1)

# # Initialize the list to store all documents
# docs = []

# # Iterate through the PDF files in the corpus and load each one
# for pdf_file in pdf_files:
#     loader = PyPDFLoader(pdf_file)
#     documents = loader.load()
#     # Append each document in the list of documents to docs
#     docs.extend(documents)

# # Split the documents into chunks of text
# splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
# documents = splitter.split_documents(docs)

In [None]:
# creating our vector embeddings: (don't run again unless needed)
# vector = FAISS.from_documents(documents, OpenAIEmbeddings(openai_api_key=openai_api_key))

# # storing the vector embeddings to my computer
# vector.save_local("vector_store")

In [115]:
# loading the vector embeddings as a retriever for the RAG
x = FAISS.load_local("vector_store", OpenAIEmbeddings(openai_api_key=openai_api_key), allow_dangerous_deserialization=True)

retriever = x.as_retriever(search_kwargs={"k": 10})


### Creating our first Q&A system!

In [10]:
# generating the RAG
prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context. \
                                          If there is not enough provided context, do not answer the question.

<context>
{context}
</context>

Question: {input}""")

document_chain = create_stuff_documents_chain(llm, prompt)

In [11]:
# Q&A for RAG
retrieval_chain = create_retrieval_chain(retriever, document_chain)

#answer
response = retrieval_chain.invoke({"input": "What do the United Steelworkers of America have to say?"})
print(response['answer'])


The United Steelworkers of America represent a significant portion of workers at Eversource, EGMA, National Grid, and Berkshire Gas in Massachusetts. They are concerned about the models and processes related to labor unions in these companies and believe that a just transition is necessary, with the voices of labor being included in the dialogue. They emphasize the importance of decent pay, good benefits, working conditions, and safety for employees.


In [17]:
response

{'input': 'Are United Steelworkers of America stakeholders in the proceeding?',
 'context': [Document(page_content='NEGWA  (Labor  discussion)  \n\uf0b7 Introductions and overview of Unions in Massachusetts and New England  \n\uf0b7 Nikki Horberg Decter, Segal Roitman  representing NEGWA and other unions  \n\uf0b7 Kathy Laflash, NEGWA , 1204 United Steelworkers employee at Eversource  \n\uf0b7 Jon Buonopane , NEGWA, United St eel W orkers  \n\uf0b7 Adam Lupino , Laborers Union  International, LiUNA!  \n\uf0b7 Nikki H D: Each of the LDCs in Massachusetts has a workforce that is represented in \nwhole or in part by labor unions. The Steelworkers represent a significant portion of \nworkers at Eversource, EGMA , National Grid, and Berkshire Gas. There are other \nunions, notably utility workers who represent other portions of the internal gas workforce. \nAs Adam described , there is a w hole world of outsid e contractors who support LDCs – \nconstruction pipeline, di stribution lines, an

### Using LangChain's Tools and Agent framework to create a chatbot

#### Making our previous RAG a tool

In [67]:
#RAG Tool retriever

from langchain.tools.retriever import create_retriever_tool


retriever_tool = create_retriever_tool(
    retriever,
    "Massachusetts_2080_General_RAG",
    "This tool must be used to answer any specific questions about the Massachusetts 2080 proceeding a.k.a. the Future of Gas in Massachusetts, or the DPU's gas distribution proceeding.",
)

#### Including web search as a tool for our chatbot

In [69]:
#TAVILY
from langchain_community.tools.tavily_search import TavilySearchResults

# load tavily API key 
os.environ["TAVILY_API_KEY"] = "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"

tavily_api_key = os.getenv("TAVILY_API_KEY")

# create search object 
search_tool = TavilySearchResults(tavily_api_key=tavily_api_key)


#### Creating another RAG for our chatbot with just the 140 page summary document of the 20-80 DPU case

In [144]:
# Finally, creating one more RAG for summary questions based on PDF #16

pdf16 = "pdf_files/pdf_16.pdf"

# initialize the loader
loader = PyPDFLoader(pdf16)

doc_summary = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
documents_summary = splitter.split_documents(doc_summary)


In [120]:
## creating our vector embeddings for pdf16 summary document: (don't run again unless needed)
# vector_summary = FAISS.from_documents(documents_summary, OpenAIEmbeddings(openai_api_key=openai_api_key))

# vector_summary.save_local("pdf16_vector_store")

In [121]:
y = FAISS.load_local("pdf16_vector_store", OpenAIEmbeddings(openai_api_key=openai_api_key), allow_dangerous_deserialization=True)

retriever_summary = y.as_retriever(search_kwargs={"k": 10})

In [123]:
# Summary RAG tool

summary_retriever_tool = create_retriever_tool(
    retriever_summary,
    "Massachusetts_2080_Summary_RAG",
    "This tool must be used to answer any summary questions about the conclusion (aka anything related to the final order) \
        of the Massachusetts 2080 proceeding a.k.a. the Future of Gas in Massachusetts, or the DPU's gas distribution proceeding.",
)

#### Initializing the tools

In [124]:
#List of tools
tools = [retriever_tool, search_tool, summary_retriever_tool]

#### Initializing the agent

In [133]:
from langchain import hub
from langchain.agents import create_openai_tools_agent
from langchain.agents import AgentExecutor

# Get the prompt to use - you can modify this!
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. You may not need to use tools for every query - the user may just want to chat!",
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

# You need to set OPENAI_API_KEY environment variable or pass it as argument `api_key`.
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, openai_api_key=openai_api_key)
agent = create_openai_tools_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

#### Making the chatbot conversational by using chat history

In [134]:
# conversational agent
from langchain.memory import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

demo_ephemeral_chat_history_for_chain = ChatMessageHistory()

conversational_agent_executor = RunnableWithMessageHistory(
    agent_executor,
    lambda session_id: demo_ephemeral_chat_history_for_chain,
    input_messages_key="input",
    output_messages_key="output",
    history_messages_key="chat_history",
)

### Interacting with the chatbot!

In [149]:
response = conversational_agent_executor.invoke(
    {
        "input": "Who was the secretary of the MA Department of Public Utilities during the 2080 proceeding? What do you think his favorite pasta sauce might be based on his last name?",
    },
    {"configurable": {"session_id": "unused"}},
)

Parent run 4adbdcda-6c63-496c-9e4c-ffb9d1fefc7c not found for run 2f76d791-e4da-490a-b0e3-91eb15350bd6. Treating as a root run.




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `Massachusetts_2080_General_RAG` with `{'query': 'Secretary of the MA Department of Public Utilities during the 2080 proceeding'}`


[0m[36;1m[1;3mThe Commonwealth of Massachusetts  
 ——  
DEPARTMENT OF PUBLIC UTILITIES  
  
 
 
D.P.U. 20 -80 June 29, 2021  
Investigation by the Department of Public Utilities on its own Motion into the role of gas 
local distribution companies as the Commonwealth achieves its target 2050 climate goals.  
________________________________ ________________________________ ____________   
ORDER ON ATTORNEY GENERAL’S REVISED NOTICE  OF RETENTION  
OF EXPERTS AND CONSULTANTS  
 
 APPEARANCE S: Maura Healey , Attorney General  
Commonwealth of Massachusetts  
By: Rebecca Tepper   
 Donald Boecke  
 JoAnn Bodemer  
 Jessica Freedman  
 Assistant Attorneys General  
Office of Ratepayer Advocacy  
One Ashburton Place  
Boston, Massachusetts  02108  
  Intervenor

COMMONWEALTH OF MASSACHUS

In [148]:
response

{'input': 'Who was the secretary of the MA Department of Public Utilities during the 2080 proceeding? What do you think his favorite pasta sauce might be?',
 'chat_history': [HumanMessage(content='What did the final order of Massachusetts 20-80 declare? Give me a summary with 5 bullet points.'),
  AIMessage(content="The final order of the Massachusetts 20-80 proceeding declared the following:\n\n1. The Massachusetts gas local distribution companies are directed to comply with the directives outlined in the order.\n2. A regulatory strategy has been set forth to move the Commonwealth beyond gas towards its climate objectives, including new reporting and analysis requirements.\n3. The Department will facilitate a safe, orderly, and equitable transition for the LDCs and their customers while pursuing the Commonwealth's 2050 GHG emissions reductions mandate.\n4. The order emphasizes the need to protect customers, particularly low-income and environmental justice populations, from rate shock