In [1]:
from langchain.document_loaders import PyPDFLoader

import os
from dotenv import load_dotenv


In [2]:
load_dotenv()
os.environ["AZURE_OPENAI_API_KEY"] = os.getenv("AZURE_OAI_KEY")
os.environ["AZURE_OPENAI_ENDPOINT"] = os.getenv("AZURE_OAI_ENDPOINT")

In [3]:
from langchain_openai import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding",
    openai_api_version="2023-05-15"
)

# text1 = "apple"
# text2 = "banana"
# text3 = "rain"

# query_result = embeddings.embed_query(text1)
# doc_result = embeddings.embed_documents([text1])

# print(doc_result[0][:5])

In [4]:
# Load a PDF document
file_path = "data/bp-report.pdf"
loader = PyPDFLoader(file_path=file_path)

In [5]:
# Split the document into chunks
pages = loader.load_and_split()
print(pages[0])

page_content="Performing while transforming\nFinancial summary Third Second Third Nine Nine\nquarter quarter quarter months months\n$ million 2023 2023 2022 2023 2022\nProfit (loss) for the period attributable to bp shareholders  4,858  1,792  (2,163)  14,868  (13,290) \nInventory holding (gains) losses*, net of tax  (1,212)  549  2,186  (211)  (2,085) \nReplacement cost (RC) profit (loss)*  3,646  2,341  23  14,657  (15,375) \nNet (favourable) adverse impact of adjusting items*, net of tax  (353)  248  8,127  (3,812)  38,221 \nUnderlying RC profit*  3,293  2,589  8,150  10,845  22,846 \nOperating cash flow*  8,747  6,293  8,288  22,662  27,361 \nCapital expenditure*  (3,603)  (4,314)  (3,194)  (11,542)  (8,961) \nDivestment and other proceeds (a) 655  88  606  1,543  2,509 \nSurplus cash flow*  3,107  (269)  3,496  5,121  14,080 \nNet issue (repurchase) of shares  (2,047)  (2,073)  (2,876)  (6,568)  (6,756) \nNet debt* (b) 22,324  23,660  22,002  22,324  22,002 \nAdjusted EBITDA*  10,

In [6]:
# Create a vector store
from langchain.vectorstores import FAISS

db = FAISS.from_documents(documents=pages, embedding=embeddings)
db.save_local("data/faiss_index")


In [8]:
from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    deployment_name=os.getenv("AZURE_OAI_DEPLOYMENT"),
    model_name=os.getenv("AZURE_OAI_MODEL"),
    openai_api_version="2023-05-15",
    openai_api_type="azure"
)

In [11]:
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.question_answering import load_qa_chain

# Load FAISS vector store saved locally
vectorStore = FAISS.load_local("data/faiss_index", embeddings)

# Use the vector store to search the local document
retriever = vectorStore.as_retriever(search_type="similarity", search_kwargs={"k": 2})

qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    condense_question_prompt=,
    return_source_documents=True,
    verbose=False
)

<class 'langchain.chains.conversational_retrieval.base.ConversationalRetrievalChain'>


In [21]:
import streamlit as st

# Process user query and get response
def ask_question_with_context(qa, question, chat_history):
    result = qa({"question": question, "chat_history": chat_history})
    chat_history.append((question, result["answer"]))
    return chat_history

user_query = st.text_input("Ask a question:")

chat_history = []
if st.button("Submit"):
    if user_query:
        st.write("User Query:", user_query)
        chat_history = ask_question_with_context(qa, user_query, chat_history)
        response = chat_history[-1][1] if chat_history else "No response"
        st.write("Answer:", response)

2024-02-06 17:28:55.895 
  command:

    streamlit run /Users/z.gurney/miniconda3/envs/ml-test/lib/python3.10/site-packages/ipykernel_launcher.py [ARGUMENTS]
