In [1]:
#installing ollama
#!pip install ollama
!curl -fsSL https://ollama.com/install.sh | sh

>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [2]:
#starting ollama server locally
import subprocess
import time
process = subprocess.Popen("ollama serve", shell=True)
time.sleep(5)  # Wait for 5 seconds

In [3]:
#pulling llama3 using ollama
!ollama pull llama3.1:8b

[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest 
pulling 667b0c1932bc... 100% ▕▏ 4.9 GB                         
pulling 948af2743fc7... 100% ▕▏ 1.5 KB                         
pulling 0ba8f0e314b4... 100% ▕▏  12 KB                         
pulling 56bb8bd477a5... 100% ▕▏   96 B                         
pulling 455f34728c9b... 100% ▕▏  487 B                         
verifying sha256 digest 
writing manifest 
success [?25h


In [4]:
#testing llama 3 is availble
!ollama list

NAME                       ID              SIZE      MODIFIED               
llama3.1:8b                46e0c10c039e    4.9 GB    Less than a second ago    
nomic-embed-text:latest    0a109f422b47    274 MB    5 minutes ago             


In [5]:
!ollama pull nomic-embed-text

[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest 
pulling 970aa74c0a90... 100% ▕▏ 274 MB                         
pulling c71d239df917... 100% ▕▏  11 KB                         
pulling ce4a164fc046... 100% ▕▏   17 B                         
pulling 31df23ea7daa... 100% ▕▏  420 B                         
verifying sha256 digest 
writing manifest 
success [?25h


In [6]:
!pip install langchain_community langchain langchain-openai langchain_pinecone langchain[docarray] docarray pydantic==1.10.8 pytube python-dotenv tiktoken pinecone-client scikit-learn ruff pypdf faiss-cpu sentence-transformers flask-ngrok flask flask-ngrok




In [7]:
import os
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Ollama
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.embeddings import OllamaEmbeddings
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import DocArrayInMemorySearch
from operator import itemgetter
from langchain.embeddings import SentenceTransformerEmbeddings

In [8]:

#setting up model dynamically
#from dotenv import load_dotenv

#load_dotenv()

#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
#MODEL = "gpt-3.5-turbo"
#MODEL = "mixtral:8x7b"
MODEL = "llama3.1:8b"
#MODEL = "llama3.1:70b"

#VECTOR_STORE = "default"
VECTOR_STORE = "FAISS"
#VECTOR_STORE = "pinecone"

In [9]:
#loading model based on selection
if MODEL.startswith("gpt"):
    model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model=MODEL)
    embeddings = OpenAIEmbeddings()
else:
    model = Ollama(model=MODEL, temperature = 0.2,top_p=1)
    #embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")
    embeddings = OllamaEmbeddings(model="nomic-embed-text")

#testing model invoke
#model.invoke("what is ML")

In [10]:
#creating parser
parser = StrOutputParser()

chain = model | parser

#testing model invoke with parser
#chain.invoke("what is ML")

In [11]:
#loading pdf files
import copy

loader = PyPDFLoader("/content/gxocompany.pdf")
pages = loader.load_and_split()
pages_original = copy.deepcopy(pages)
#pages_original[35]

In [12]:
# Create an empty string to store the combined content
combined_content = ""
for page in pages_original:
    combined_content += page.page_content

# Print or use the combined content
#print(combined_content)

In [13]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # Adjust based on document size
    chunk_overlap=200,  # Ensures important context isn't lost
    separators=["\n\n", "\n", " "]  # Prioritize splitting at paragraph breaks
)

#chunks = text_splitter.split_documents(pages)
chunks = text_splitter.create_documents([combined_content])

In [14]:
import pandas as pd
from langchain.schema import Document

df = pd.read_excel('/content/gxo_statement.xlsx')

all_content = ""
separator = "\n\n"  # Define your separator (e.g., newline characters)
for index, row in df.iterrows():
    row_content = ' '.join([str(cell) for cell in row.values])  # Combine all cell values in a row
    all_content += row_content + separator  # Add row content and separator to the combined string


document = Document(page_content=all_content)
chunks_excel = text_splitter.split_documents([document])

# Add metadata to each chunk after splitting
for chunk in chunks_excel:
    chunk.metadata["source"] = "financial data"

#print(chunks_excel)

all_chunks = chunks_excel + chunks
#print(all_chunks)

In [15]:
#loading vector store based on selection
if VECTOR_STORE == "FAISS":
    #vectorstore = FAISS.from_documents(pages, embeddings)
    vectorstore = FAISS.from_documents(all_chunks, embeddings)
else:
    vectorstore = DocArrayInMemorySearch.from_documents(chunks, embedding=embeddings, verbose=True)

In [16]:
def keyword_aware_retriever(question):
    keyword_results = []
    regular_results = []

    if "financial data" in question.lower():
        # Keyword search for "financial data"
        keyword_results = vectorstore.search(
            question,
            search_type="similarity",
            search_kwargs={"filter": {"source": "financial data"}, "k": 1}
        )

        # Check if keyword_results contains a document with the desired metadata
        has_financial_data = any(doc.metadata.get("source") == "financial data" for doc in keyword_results)

        # If not, add chunks_excel to keyword_results
        if not has_financial_data:
            keyword_results = chunks_excel + keyword_results

    # Regular semantic search
    regular_results = vectorstore.search(question, search_type="similarity", search_kwargs={"k": 18})

    # Combine results, prioritizing keyword matches
    combined_results = keyword_results + [
        doc for doc in regular_results if doc not in keyword_results
    ]

    return combined_results[:20]  # Limit to top 20 results

In [17]:
#setting up retriver
retriever = vectorstore.as_retriever(search_kwargs={"k": 20})

#testing retriver
#retriever.invoke("balance sheets")

In [18]:
def print_context(inputs):
  print("Context:")
  print(inputs["context"])
  return inputs

In [19]:
#creating template
template = """
Analyze the provided financial data, focusing on the specified financial metric and its impact.

When conducting your analysis, consider the following:

1.  **Key Figures:** State the key financial figures for the current and prior periods relevant to the specified metric.
2.  **Change Analysis:** Calculate and state the year-over-year (y/y) change in the metric, including both absolute values and percentages.
3.  **Explanation of Changes:** Provide concise and specific reasons for the observed changes in the financial metric.

In addition, incorporate the following into your analysis as relevant:

-   **Related Ratios or Metrics:** Include any relevant financial ratios or metrics that provide further context or insights into the changes in the primary metric. Analyze their trends (stable, increased, decreased).
-   **Impact on Profitability:** If applicable, discuss the impact of the changes in the financial metric on profitability (e.g., net operating profit, gross profit).

Ensure the following:
-   Use specific numerical values, including dollar amounts and percentages, where applicable.
-   Provide concise and specific reasons for changes.
-   Avoid unnecessary details.
-   Present your analysis in a clear and concise manner.

Financial Data: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)

#testing prompt
#prompt.format(context="Here is some context", question="Here is a question")

In [20]:
#chaining all operations
from langchain_core.runnables import RunnableLambda

chain = (
    #{
     #   "context": itemgetter("question") | retriever, #| RunnableLambda(lambda x: all_text),
      #  #"context": RunnableLambda(lambda x: combined_content),
       # "question": itemgetter("question"),
    #}
     RunnableLambda(lambda x: {"context": keyword_aware_retriever(x["question"]), "question": x["question"]})
    #| RunnableLambda(print_context)
    | prompt
    |model
    | parser
)

In [21]:
#posting questions
questions = [
    #"provide finanacial analysis as underwriter in financial company by going to complete context that has in company annual reports",
     #"how is the revenue growth of company in 2023",
   """
  Analyze the financial data and provide a structured summary of revenue growth for the current and prior periods, using the keyword "revenue" to pick up relevant figures.

Your response should include:

1. Total revenue for the current and prior periods, expressed in the appropriate units (e.g., millions, billions).
2. Year-over-year (y/y) change in revenue, including:
    - Absolute change in dollars ($).
    - Percentage change (%).
3. Key factors contributing to the revenue change, with specific examples and data points from the financial statements to support your findings. If possible, quantify the impact of each factor on revenue in dollar terms.
""",
"""
Analyze the 'transactions and integration costs' from the provided financial data.

Your analysis should include the following:

1. **Key Figures:** State the total 'transactions and integration costs' for the current and prior periods.
2. **Change Analysis:** Calculate and state the year-over-year (y/y) change in 'transactions and integration costs,' including both the absolute value and percentage change.
3. **Explanation of Changes:** Provide a detailed explanation for the observed change in 'transactions and integration costs,' including specific reasons and any contributing factors.
Present your analysis in a clear and concise manner, using specific numerical values, dollar amounts, and percentages wherever applicable.
""",
"""
Analyze the 'total operating expenses' from the provided financial data, focusing on the operating expense ratio, net operating profit, and revenue.

Your analysis should include the following:

1. Key Figures:
    - State the total 'operating expenses' for the current period and prior period in millions.
    - Include a breakdown of specific contributing factors and their impact on net operating profit, if available.
    - What is the operating expense ratio for the current period and prior period, expressed as a percentage? How does the trend of operating expenses compare to revenue growth?
2. Change Analysis:
    - Calculate and state the year-over-year (y/y) change in 'operating expenses,' including both the absolute value (in $MM) and percentage change (%).
    - What is the year-over-year change in net operating profit, expressed in both dollar amount ($MM) and percentage (%)?
3. Explanation of Changes:
    - Provide a detailed explanation for the observed change in 'operating expenses,' including specific reasons and any contributing factors.
    - If possible, mention factors like lease and bad debt expenses and their impact in dollar terms, if they are relevant to the changes.

Present your analysis in a clear and concise manner, using specific numerical values, dollar amounts, and percentages wherever applicable. Ensure you extract values directly from the 'Structured Income Statement Summary' if available in the financial data.
""",
"""
Analyze the financial data provided and summarize the key financial performance for the recent fiscal year, focusing on revenue, cost of goods sold (COGS), gross profit, and gross profit margin.

Your analysis should include:

* **Key Figures:** Present the key financial figures for the current and prior periods for each of the metrics mentioned, including specific amounts and percentages. Show your calculations to derive these figures where possible, using the data provided in the context.
* **Change Analysis:** Calculate and state the year-over-year (y/y) change for each metric, including both absolute values ($) and percentages (%).
* **Explanation of Changes:** Provide concise and specific reasons for the observed changes in each metric, referencing the context and highlighting key factors driving the performance.
* **Related Ratios and Metrics:** Include relevant financial ratios or metrics that provide further context (e.g., gross profit margin). Analyze their trends (stable, increased, decreased) and their impact on profitability.
* **Impact on Profitability:** Discuss the overall impact of the changes on the company's profitability, highlighting key drivers and trends.

Present your analysis in a clear and concise manner, using bullet points or numbered lists for clarity. Support your analysis with specific numerical values, dollar amounts, and percentages wherever applicable.
"""
]

for question in questions:
    print(f"Question: {question}")
    print(f"Answer: {chain.invoke({'question': question})}")
    print()

Question: 
  Analyze the financial data and provide a structured summary of revenue growth for the current and prior periods, using the keyword "revenue" to pick up relevant figures.

Your response should include:

1. Total revenue for the current and prior periods, expressed in the appropriate units (e.g., millions, billions).
2. Year-over-year (y/y) change in revenue, including:
    - Absolute change in dollars ($).
    - Percentage change (%).
3. Key factors contributing to the revenue change, with specific examples and data points from the financial statements to support your findings. If possible, quantify the impact of each factor on revenue in dollar terms.

Answer: **Revenue Growth Analysis**

Based on the provided financial data, we will analyze the revenue growth for the current period (2023) compared to the prior period (2022).

**Key Figures:**

*   Total Revenue:
    *   2023: $9.778 billion
    *   2022: $8.993 billion

**Change Analysis:**

*   Year-over-year (y/y) chang

In [22]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3


In [None]:
from flask import Flask, request, jsonify
from pyngrok import ngrok

app = Flask(__name__)

@app.route('/api/invoke', methods=['POST'])
def invoke_chain():
    try:
        data = request.get_json(force=True)
        question = data.get('question')
        if question:
            print(question)
            result = chain.invoke({'question': question})
            return jsonify({'result': result}), 200
        else:
            return jsonify({'error': 'Missing "question" in request body'}), 400
    except Exception as e:
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    # Open a ngrok tunnel to the HTTP server
    ngrok.set_auth_token("<authotoken>") # signup and get auth token from https://dashboard.ngrok.com/get-started/your-authtoken
    public_url = ngrok.connect(5000).public_url
    print(' * ngrok tunnel "{}" -> "http://127.0.0.1:5000"'.format(public_url))
    app.run(port=5000)

#curl command to make call from local curl -X POST https://7170-34-16-137-64.ngrok-free.app/api/invoke -H "Content-Type: application/json" --data-binary @"data.json"

 * ngrok tunnel "https://7170-34-16-137-64.ngrok-free.app" -> "http://127.0.0.1:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


What is the revenue growth of the company in 2023?


INFO:werkzeug:127.0.0.1 - - [16/Feb/2025 17:18:39] "POST /api/invoke HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [16/Feb/2025 17:19:58] "[35m[1mPOST /api/invoke HTTP/1.1[0m" 500 -


Analyze the financial data and provide a structured summary of revenue growth for the current and prior periods, using the keyword "revenue" to pick up relevant figures.

Your response should include:

1. Total revenue for the current and prior periods, expressed in the appropriate units (e.g., millions, billions).
2. Year-over-year (y/y) change in revenue, including:
    - Absolute change in dollars ($).
    - Percentage change (%).
3. Key factors contributing to the revenue change, with specific examples and data points from the financial statements to support your findings. If possible, quantify the impact of each factor on revenue in dollar terms.


INFO:werkzeug:127.0.0.1 - - [16/Feb/2025 17:22:02] "POST /api/invoke HTTP/1.1" 200 -
