In [1]:
import os
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import AzureOpenAIEmbeddings
from langchain.embeddings import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader

In [2]:
# !python -m pip install langchain-openai==0.0.6 openai==1.12.0 langchain python-dotenv faiss-cpu pypdf

In [3]:
from dotenv import load_dotenv
load_dotenv(".env")

True

In [5]:
from langchain.chat_models import AzureChatOpenAI
llm = AzureChatOpenAI(
    azure_deployment=os.getenv('AZURE_OPENAI_CHAT_DEPLOYMENT'),
    azure_endpoint=os.getenv('AZURE_OPENAI_CHAT_ENDPOINT'),
    openai_api_key=os.getenv('AZURE_OPENAI_CHAT_API_KEY'),
    api_version=os.getenv('AZURE_OPENAI_CHAT_VERSION'),
    verbose=False,
    temperature=0.3,
)

  warn_deprecated(


In [6]:
from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
)
prompt = "Provide a detailed company overview for a pitch book presentation."
messages = [
    SystemMessage(content=f"You are an assistant helping with investment banking slides.  {prompt}" )                
]

In [7]:
response = llm(
        messages= messages,
        max_tokens=100,
        temperature=0.2,
    )
response.content

  warn_deprecated(


"Company Overview: XYZ Corporation\n\nIntroduction:\nXYZ Corporation is a leading global technology company that specializes in the development and distribution of innovative software and hardware solutions. The company was founded in 1995 and has since grown to become one of the most respected and recognized brands in the technology industry.\n\nProducts and Services:\nXYZ Corporation offers a wide range of products and services that cater to the needs of businesses and individuals alike. The company's flagship product is its operating system, which is used by millions of people around"

In [9]:
embeddings = AzureOpenAIEmbeddings(
        azure_deployment=os.getenv('AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT'),
        azure_endpoint=os.getenv('AZURE_OPENAI_EMBEDDINGS_ENDPOINT'),
        api_key=os.getenv('AZURE_OPENAI_EMBEDDINGS_API_KEY'),
        api_version=os.getenv('AZURE_OPENAI_EMBEDDINGS_VERSION')
    )

In [10]:
def create_vector_database(txt_path):
    loader=PyPDFDirectoryLoader(txt_path)
    docs=loader.load()

    documents = RecursiveCharacterTextSplitter(
        chunk_size=1000, separators=["\n","\n\n"], chunk_overlap=200
    ).split_documents(docs)

    embeddings = AzureOpenAIEmbeddings(
        azure_deployment=os.getenv('AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT'),
        azure_endpoint=os.getenv('AZURE_OPENAI_EMBEDDINGS_ENDPOINT'),
        api_key=os.getenv('AZURE_OPENAI_EMBEDDINGS_API_KEY'),
        api_version=os.getenv('AZURE_OPENAI_EMBEDDINGS_VERSION')
    )
    db = FAISS.from_documents(
        documents=documents,
        embedding=embeddings
    )
    db.save_local("./faiss-db")

In [11]:
create_vector_database('10k')

In [12]:
from langchain.prompts import PromptTemplate

In [13]:
prompt_template = """
Human: Use the following pieces of context to provide a 
concise answer to the question at the end but use atleast summarize with 
250 words with detailed explantions. If you don't know the answer, 
just say that you don't know, don't try to make up an answer.
<context>
{context}
</context

Question: {question}

Assistant:"""

In [14]:
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [15]:
vectorstore = FAISS.load_local("./faiss-db", embeddings, allow_dangerous_deserialization=True)

In [16]:
retriever = vectorstore.as_retriever()

In [17]:
question = "Get the company performance"

In [18]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [19]:
rag_chain = (
    {"context": vectorstore.as_retriever(search_kwargs={"k": 1}), "question": RunnablePassthrough()}
    | PROMPT
    | llm
    | StrOutputParser()
)

In [20]:
print(rag_chain.invoke(question))

The provided context shows a graph comparing the five-year cumulative total shareholder return of Apple Inc., the S&P 500 Index, and the Dow Jones U.S. Technology Supersector Index. The graph assumes $100 was invested in each of the three options as of September 28, 2018. The past stock price performance is not necessarily indicative of future stock price performance. According to the graph, as of September 30, 2023, Apple Inc. had a cumulative total return of $317, while the S&P 500 Index had a cumulative total return of $160. Therefore, based on this information, Apple Inc. has outperformed the S&P 500 Index in terms of cumulative total shareholder return over the past five years.


In [21]:
len(retriever.get_relevant_documents(question)[0].page_content.split(" "))

  warn_deprecated(


150