In [None]:
# Install necessary libraries
!pip install pytesseract langchain_community langchain_core pytesseract langchain PIL 

In [17]:
import pytesseract
from PIL import Image
from langchain.docstore.document import Document
from langchain_community.chat_models import ChatOllama
from langchain.prompts import (
    ChatPromptTemplate,
    MessagesPlaceholder
)
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.messages import HumanMessage


In [18]:
# Load the image of the income statement
image_path = './data/test/is.jpg'
image = Image.open(image_path)

# Use Tesseract to extract text from the image
extracted_text = pytesseract.image_to_string(image)
# Save the extracted text to a text file
output_text_file = './data/test/extracted_text.txt'
with open(output_text_file, 'w') as file:
    file.write(extracted_text)

print(f"Extracted text has been saved to {output_text_file}")

Extracted text has been saved to ./data/test/extracted_text.txt


In [19]:
# local_llm = 'gemma'
local_llm = 'llama3'
# local_llm = 'llama3.1'
# local_llm = 'mistral'

llm = ChatOllama(model=local_llm, temperature=0)

In [20]:
# Load the extracted text
with open(output_text_file, 'r', encoding='utf-8') as file:
    txt_content = file.read()


In [21]:
context_document = Document(page_content=txt_content)


In [22]:
# Define the QA prompt template
qa_system_prompt = """system You are an assistant for question-answering tasks. Use the following context to answer the question. Avoid phrases like "Based on the provided context". Explain the answer in the end and make a heading with a paragraph.
Question: {input}
Context: {context}
Answer: assistant"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

# Create the question-answer chain
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

In [23]:
# Chat history
chat_history = []


In [27]:
# Ask the question
question = "can you analyze this data and explain"
ai_msg_1 = question_answer_chain.invoke({"input": question, "chat_history": chat_history, "context": [context_document]})
chat_history.extend([HumanMessage(content=question), ai_msg_1])

print(ai_msg_1)

**Financial Analysis of a Company**

The provided financial data represents the company's performance for the year ended September 28, 2019. Here's an analysis of the key metrics:

**Revenue and Gross Profit**: The company reported net sales of $4,358,100, with a gross profit of $1,619,386, indicating a gross margin of approximately 37%. This suggests that the company has a relatively stable pricing strategy and is able to maintain its profitability despite fluctuations in costs.

**Operating Expenses**: Total operating expenses stood at $854,159, which is roughly 19.6% of net sales. The majority of these expenses ($560,430) were attributed to selling and operating expenses, while general and administrative expenses accounted for $293,729. This suggests that the company has a significant focus on marketing and sales efforts.

**Operating Income**: The company reported an operating income of $765,227, which is approximately 17.5% of net sales. This indicates that the company's operation