In [1]:
from langchain.schema import HumanMessage, SystemMessage
from langchain.chains import ConversationChain, HypotheticalDocumentEmbedder, LLMChain, RetrievalQA

from langchain import PromptTemplate
from langchain.memory import ConversationSummaryMemory
from langchain_cohere import CohereRerank
from langchain_community.chat_models import ChatMlflow
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_community.embeddings import MlflowEmbeddings
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor, EmbeddingsFilter, DocumentCompressorPipeline
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from domino_data.vectordb import domino_pinecone3x_init_params, domino_pinecone3x_index_params
from langchain_pinecone import PineconeVectorStore
import os
from pinecone import Pinecone
import sys
from mlflow.deployments import get_deploy_client

import warnings
warnings.filterwarnings('ignore')

* 'schema_extra' has been renamed to 'json_schema_extra'


In [2]:
PINECONE_ENV = os.environ['PINECONE_ENV']
COHERE_API_KEY = os.environ['COHERE_API_KEY']

In [3]:
# Helper function for printing docs

def pretty_print_docs(docs):
    print(
        f"\n{'-' * 100}\n".join(
            [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]
        )
    )

In [4]:
# initialize embedding
embeddings = MlflowEmbeddings(
    target_uri=os.environ["DOMINO_MLFLOW_DEPLOYMENTS"],
    endpoint="embedding-ada-002ja2"
)

In [5]:
#Domino Vector Data Source name
datasource_name = "mrag-fin-docs-ja"
# Load Domino Pinecone Data Source Configuration 
pc = Pinecone(**domino_pinecone3x_init_params(datasource_name))


# Load Pinecone Index
index_name = "mrag-fin-docs"
index = pc.Index(**domino_pinecone3x_index_params(datasource_name, index_name))
text_field = "text"  # switch back to normal index for langchain
vectorstore = PineconeVectorStore(  
    index, embeddings, text_field   # Using embedded data from Domino AI Gateway Endpoint
)

In [6]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 20})

In [7]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 361}},
 'total_vector_count': 361}

In [8]:
chatLLM = ChatMlflow(
        target_uri=os.environ["DOMINO_MLFLOW_DEPLOYMENTS"],
        endpoint="chat-gpt4-ja", 
        temperature=0.0,
    )

In [9]:

docs = retriever.get_relevant_documents( "How did the Americas do in net sales in FY23?"
    #"Were there any product annoucements by Apple in FY23?"
)
pretty_print_docs(docs)

Document 1:

Segment Operating Performance
The following table shows net sales by reportable segment for 2023, 2022 and 2021 (dollars in millions):
2023 Change 2022 Change 2021
Net sales by reportable segment:
Americas $ 162,560 (4)%$ 169,658 11 %$ 153,306 
Europe 94,294 (1)% 95,118 7 % 89,307 
Greater China 72,559 (2)% 74,200 9 % 68,366 
Japan 24,257 (7)% 25,977 (9)% 28,482 
Rest of Asia Pacific 29,615 1 % 29,375 11 % 26,356 
Total net sales $ 383,285 (3)%$ 394,328 8 %$ 365,817 
Americas
Americas net sales decreased 4% or $7.1 billion  during 2023 compared to 2022 due to lower net sales of iPhone and Mac, partially offset by higher net sales of
Services.
Europe
Europe net sales decreased 1% or $824 million during 2023 compared to 2022. The weakness in foreign currencies relative to the U.S. dollar accounted for
more than the entire year-over-year decrease in Europe net sales, which consisted primarily of lower net sales of Mac and Wearables, Home and Accessories,
---------------------

In [12]:
# built-in compressors: filters
compressor = LLMChainExtractor.from_llm(chatLLM)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, 
    base_retriever=vectorstore.as_retriever(search_kwargs={"k": 20})
)

compressed_docs = compression_retriever.get_relevant_documents(
    "How did the Americas do in net sales in FY23?"
)

pretty_print_docs(compressed_docs)

Document 1:

Americas $ 50,430 $ 49,278 2 %
Americas net sales increased 2% or $1.2 billion during the first quarter of 2024 compared to the same quarter in 2023 due primarily to higher net sales of
Services and iPhone, partially offset by lower net sales of iPad. The strength in foreign currenci es relative to the U.S. dollar had a net favorable year-over-year
impact on Americas net sales during the first quarter of 2024.
----------------------------------------------------------------------------------------------------
Document 2:

Americas $ 50,430 $ 49,278 2 %
Americas net sales increased 2% or $1.2 billion during the first quarter of 2024 compared to the same quarter in 2023 due primarily to higher net sales of
Services and iPhone, partially offset by lower net sales of iPad. The strength in foreign currenci es relative to the U.S. dollar had a net favorable year-over-year
impact on Americas net sales during the first quarter of 2024.
---------------------------------------------

In [10]:
# built-in compressors: rerank
cohere_rerank = CohereRerank(cohere_api_key=COHERE_API_KEY)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=cohere_rerank,  
    base_retriever=vectorstore.as_retriever(search_kwargs={"k": 20})
)

compressed_docs = compression_retriever.get_relevant_documents(
    "How did the Americas do in net sales in FY23?"
)

pretty_print_docs(compressed_docs)

Document 1:

Segment Operating Performance
The following table shows net sales by reportable segment for 2023, 2022 and 2021 (dollars in millions):
2023 Change 2022 Change 2021
Net sales by reportable segment:
Americas $ 162,560 (4)%$ 169,658 11 %$ 153,306 
Europe 94,294 (1)% 95,118 7 % 89,307 
Greater China 72,559 (2)% 74,200 9 % 68,366 
Japan 24,257 (7)% 25,977 (9)% 28,482 
Rest of Asia Pacific 29,615 1 % 29,375 11 % 26,356 
Total net sales $ 383,285 (3)%$ 394,328 8 %$ 365,817 
Americas
Americas net sales decreased 4% or $7.1 billion  during 2023 compared to 2022 due to lower net sales of iPhone and Mac, partially offset by higher net sales of
Services.
Europe
Europe net sales decreased 1% or $824 million during 2023 compared to 2022. The weakness in foreign currencies relative to the U.S. dollar accounted for
more than the entire year-over-year decrease in Europe net sales, which consisted primarily of lower net sales of Mac and Wearables, Home and Accessories,
---------------------

In [11]:
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=relevant_filter, base_retriever=retriever
)

compressed_docs = compression_retriever.get_relevant_documents("How much revenue did the Americas generate in FY23?"
   # "Was there any pending litigation?"
)
pretty_print_docs(compressed_docs)

Document 1:

Segment Operating Performance
The following table shows net sales by reportable segment for 2023, 2022 and 2021 (dollars in millions):
2023 Change 2022 Change 2021
Net sales by reportable segment:
Americas $ 162,560 (4)%$ 169,658 11 %$ 153,306 
Europe 94,294 (1)% 95,118 7 % 89,307 
Greater China 72,559 (2)% 74,200 9 % 68,366 
Japan 24,257 (7)% 25,977 (9)% 28,482 
Rest of Asia Pacific 29,615 1 % 29,375 11 % 26,356 
Total net sales $ 383,285 (3)%$ 394,328 8 %$ 365,817 
Americas
Americas net sales decreased 4% or $7.1 billion  during 2023 compared to 2022 due to lower net sales of iPhone and Mac, partially offset by higher net sales of
Services.
Europe
Europe net sales decreased 1% or $824 million during 2023 compared to 2022. The weakness in foreign currencies relative to the U.S. dollar accounted for
more than the entire year-over-year decrease in Europe net sales, which consisted primarily of lower net sales of Mac and Wearables, Home and Accessories,
---------------------

In [13]:
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[redundant_filter, relevant_filter]
)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.get_relevant_documents("How much revenue did the Americas generate in FY23?"
   # "Was there any pending litigation?"
)
pretty_print_docs(compressed_docs)

Document 1:

impact on Americas net sales during the first quarter of 2024.
Europe
Europe net sales increased 10% or $2.7 billion during the first quarter of 2024 compared to the same quarter in 2023 due primarily to higher net sales of iPhone.
The strength in foreign currencies relative to the U.S. dollar had a net favorable year-over-year impact on Europe net sales during the first quarter of 2024.
Greater China
Greater China net sales decreased 13% or $3.1 billion during the first quarter of 2024 compared to the same quarter in 2023 due primarily to lower net sales of
iPhone, iPad and Wearables, Home and Accessories. The weakness in the renminbi relative to the U.S. dollar had an unfavorable year-over-year impact on
Greater China net sales during the first quarter of 2024.
Japan
Japan net sales increase d 15% or $1.0 billion during the first quarter of 2024 compared to the same quarter in 2023 due primarily to higher net sales of iPhone.
---------------------------------------------

In [15]:
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[redundant_filter, relevant_filter]
)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor, base_retriever=retriever
)

compressed_docs = compression_retriever.get_relevant_documents("How much revenue did the Americas generate in FY23?"
   # "Was there any pending litigation?"
)
pretty_print_docs(compressed_docs)

Document 1:

impact on Americas net sales during the first quarter of 2024.
Europe
Europe net sales increased 10% or $2.7 billion during the first quarter of 2024 compared to the same quarter in 2023 due primarily to higher net sales of iPhone.
The strength in foreign currencies relative to the U.S. dollar had a net favorable year-over-year impact on Europe net sales during the first quarter of 2024.
Greater China
Greater China net sales decreased 13% or $3.1 billion during the first quarter of 2024 compared to the same quarter in 2023 due primarily to lower net sales of
iPhone, iPad and Wearables, Home and Accessories. The weakness in the renminbi relative to the U.S. dollar had an unfavorable year-over-year impact on
Greater China net sales during the first quarter of 2024.
Japan
Japan net sales increase d 15% or $1.0 billion during the first quarter of 2024 compared to the same quarter in 2023 due primarily to higher net sales of iPhone.
---------------------------------------------

In [15]:
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[redundant_filter, relevant_filter]
)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=pipeline_compressor, base_retriever=retriever
)

In [14]:
prompt_template = """As an advanced Retrieve-and-Generate (RAG) Chatbot with expertise in financial analysis, your task is to dissect corporate filings (e.g., 10-K, 10-Q, 8-K reports) of publicly traded companies and provide detailed, accurate responses to user queries about the company's financial health, market position, and future prospects. When interacting, adhere to the following guidelines:
- Context Understanding: Thoroughly comprehend the context provided, which includes excerpts or summaries from the company’s latest corporate filings. This context is your foundational source for analysis.
- Question Analysis: Analyze the user’s specific question to understand which aspect of the corporate filings it relates to, such as financial performance, risk factors, management discussion, market position, future outlook, or investment considerations.
- Structured Response: Base your response on the appropriate section(s) of the corporate filings pertinent to the question, ensuring your answer is data-driven.
- Detailed Inquiry Response: Address financial performance, risk factors, management discussion, market position, future outlook, or investment considerations with focused, evidence-backed answers.
- Evidence-Based Justification: Support your responses with direct evidence from the provided context, offering insights derived from the corporate filings.
- Clarity and Precision: Maintain clarity and precision in your responses, using accessible language and avoiding or explaining necessary financial jargon.
- Handling Unknown Answers: If the information needed to answer the question is not available in the provided context or exceeds the chatbot's analysis capabilities, respond with, "I don't have enough information to answer that question accurately. Could you provide more details or ask about another aspect?"
- Addressing Irrelevant Questions: If the question is not related to the context of corporate filings, politely respond with, "I'm here to help analyze financial documents and related inquiries. Could you ask a question related to the company's corporate filings?"
- Primary Objective is to deliver insightful, accurate, and helpful answers that enable users to make informed decisions based on corporate filings analysis. Each response should be tailored to the user's question, enhancing understanding of the company's financial status and strategic direction.
You are given the following question and extracted parts as context. 
Question: {question}
=========
{context}
=========
Answer in Markdown:
"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["question", "context"])
#
chain_type_kwargs = {"prompt": PROMPT}

In [36]:
#You are an AI assistant with expertise in financial analysis, your task is to dissect corporate filings (e.g., 10-K, 10-Q, 8-K reports) of publicly traded companies and provide detailed, accurate responses to the following user question about the company's financial health, market position, and future prospects. 
#. When interacting, adhere to the following guidelines:
prompt_template = """You are an AI assistant with expertise in financial analysis. You are given the following extracted parts and a question. 
Question: {question}
=========
{context}
=========
Answer in Markdown:
"""
PROMPT = PromptTemplate(template=prompt_template, input_variables=["question", "context"])
#
chain_type_kwargs = {"prompt": PROMPT}

In [16]:
qa_chain = RetrievalQA.from_chain_type(llm=chatLLM,
                                       chain_type="stuff",
                                       chain_type_kwargs={"prompt": PROMPT},
                                       retriever=compression_retriever, #vectorstore.as_retriever(search_kwargs={"k": 5}),
                                       return_source_documents=True
                                      )

In [17]:
user_question = input("Please ask your question:")
result = qa_chain(user_question)

Please ask your question: How much revenue did the Americas generate in FY23?


In [18]:
result["result"]

'The Americas generated $162.56 billion in revenue in FY23. This was a decrease of 4% or $7.1 billion compared to the previous year (FY22), primarily due to lower net sales of iPhone and Mac, which was partially offset by higher net sales of Services.'

In [31]:
result["result"]

"To compare Apple's operating income in FY23 to FY22, we can look at the Consolidated Statements of Operations provided in the 2023 Form 10-K report.\n\nIn FY23 (year ended September 30, 2023), Apple reported an operating income of $114,301 million. Comparatively, in FY22 (year ended September 24, 2022), the operating income was $119,437 million. This indicates a decrease in operating income from FY22 to FY23.\n\nThe decrease in operating income can be attributed to various factors mentioned in the report, such as lower net sales of Mac and iPhone, partially offset by higher net sales of Services. Additionally, the operating expenses for FY23 increased by 7% compared to FY22, with research and development expenses growing by 14% and selling, general, and administrative expenses remaining relatively flat.\n\nOverall, the decrease in operating income from FY22 to FY23 reflects the impact of changes in net sales and operating expenses on Apple's financial performance during the respective

In [27]:
result["result"]

"To compare Apple's operating income in FY23 to FY22, we can see that the operating income for FY23 was $114,301 million, while the operating income for FY22 was $119,437 million. Therefore, Apple's operating income decreased by $5,136 million or approximately 4.3% in FY23 compared to FY22."

In [22]:
result["result"]

"To compare Apple's operating income in FY23 to FY22, we can look at the Consolidated Statements of Operations from the 10-K filings. In FY23, Apple's operating income was $114,301 million, while in FY22, it was $119,437 million. This indicates a decrease in operating income from FY22 to FY23. Specifically, the operating income decreased by $5,136 million or approximately 4.3%. This decrease in operating income can be attributed to various factors such as lower net sales of Mac and iPhone, partially offset by higher net sales of Services. It's important to note that the decrease in operating income may impact the overall financial health and performance of Apple in the respective fiscal years."

In [None]:
print(result)

In [None]:
user_question = input("Please ask your financial question:")
compressed_docs = compression_retriever.get_relevant_documents(user_question)
# Print the relevant documents from using the embeddings and reranker
print(compressed_docs)

In [None]:
print(result)

In [None]:
index.describe_index_stats()


### Test harness for the Domino Job 

In [None]:
%run /mnt/code/services/data_ingestion.py
