## Convert JSON to PDF

In [2]:
import json
from fpdf import FPDF
import os

In [44]:
os.listdir()

['app.py',
 'bankstatement',
 'FinGeniusAssistant.py',
 'pdf_chat.ipynb',
 'test.ipynb']

In [3]:
path = os.path.join('bankstatement')

In [4]:
userId = "0001"
jsonPath = os.path.join(path, f"{userId}.json")
data = json.load(open(f"{jsonPath}"))

In [5]:
def draw_table_headers(pdf):
    pdf.set_fill_color(200, 220, 255)
    pdf.cell(col_widths[0], 10, "Date", 1, 0, "C", True)
    pdf.cell(col_widths[1], 10, "Description", 1, 0, "C", True)
    pdf.cell(col_widths[2], 10, "Debit", 1, 0, "C", True)
    pdf.cell(col_widths[3], 10, "Credit", 1, 0, "C", True)
    pdf.cell(col_widths[4], 10, "Balance", 1, 1, "C", True)

In [6]:
pdf = FPDF()
col_widths = [30, 60, 30, 30, 30]

pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.cell(200, 10, txt="Bank Statement", ln=True, align="C")
pdf.ln(10)
draw_table_headers(pdf)

for transaction in data:
    if pdf.get_y() > 200:
        pdf.add_page()
        pdf.set_font("Arial", size=12)
        draw_table_headers(pdf)
    pdf.cell(col_widths[0], 10, transaction["date"], 1, 0, "C")
    pdf.cell(col_widths[1], 10, transaction["description"], 1, 0, "L")
    pdf.cell(col_widths[2], 10, str(transaction["debit"]), 1, 0, "C")
    pdf.cell(col_widths[3], 10, str(transaction["credit"]), 1, 0, "C")
    pdf.cell(col_widths[4], 10, str(transaction["balance"]), 1, 1, "C")

outputPath = path
pdf.output(os.path.join(path, f"{userId}.pdf"))

''

## PDF Loader

In [7]:
from langchain_community.document_loaders import PyPDFLoader

pdfPath = os.path.join(path, f"{userId}.pdf")
loader = PyPDFLoader(pdfPath)
pages = loader.load_and_split()

In [8]:
pages

[Document(page_content='Bank Statement\nDate Description Debit Credit Balance\n2020-01-01 groceries 450.0 0.0 1000.0\n2020-01-02 salary 0.0 2000.0 3000.0\n2020-01-03 dinner 1000.0 0.0 2000.0\n2020-01-04 friend debt 0.0 6000.0 8000.0\n2020-01-01 groceries 450.0 0.0 1000.0\n2020-01-02 salary 0.0 2000.0 3000.0\n2020-01-03 dinner 1000.0 0.0 2000.0\n2020-01-04 friend debt 0.0 6000.0 8000.0\n2020-01-05 utilities 300.0 0.0 7700.0\n2020-01-06 rent 1200.0 0.0 6500.0\n2020-01-07 movie tickets 50.0 0.0 6450.0\n2020-01-08 online shopping 500.0 0.0 5950.0\n2020-01-09 car service 200.0 0.0 5750.0\n2020-01-10 medical expenses 800.0 0.0 4950.0\n2020-01-11 groceries 450.0 0.0 4500.0\n2020-01-12 salary 0.0 2000.0 6500.0', metadata={'source': 'bankstatement\\0001.pdf', 'page': 0}),
 Document(page_content='Date Description Debit Credit Balance\n2020-01-13 dinner 1000.0 0.0 5500.0\n2020-01-14 friend debt 0.0 6000.0 11500.0\n2020-01-15 utilities 300.0 0.0 11200.0\n2020-01-16 rent 1200.0 0.0 10000.0\n2020-01

In [18]:
page_content = [doc.page_content for doc in pages]
print(page_content)

['Bank Statement\nDate Description Debit Credit Balance\n2020-01-01 groceries 450.0 0.0 1000.0\n2020-01-02 salary 0.0 2000.0 3000.0\n2020-01-03 dinner 1000.0 0.0 2000.0\n2020-01-04 friend debt 0.0 6000.0 8000.0\n2020-01-01 groceries 450.0 0.0 1000.0\n2020-01-02 salary 0.0 2000.0 3000.0\n2020-01-03 dinner 1000.0 0.0 2000.0\n2020-01-04 friend debt 0.0 6000.0 8000.0\n2020-01-05 utilities 300.0 0.0 7700.0\n2020-01-06 rent 1200.0 0.0 6500.0\n2020-01-07 movie tickets 50.0 0.0 6450.0\n2020-01-08 online shopping 500.0 0.0 5950.0\n2020-01-09 car service 200.0 0.0 5750.0\n2020-01-10 medical expenses 800.0 0.0 4950.0\n2020-01-11 groceries 450.0 0.0 4500.0\n2020-01-12 salary 0.0 2000.0 6500.0', 'Date Description Debit Credit Balance\n2020-01-13 dinner 1000.0 0.0 5500.0\n2020-01-14 friend debt 0.0 6000.0 11500.0\n2020-01-15 utilities 300.0 0.0 11200.0\n2020-01-16 rent 1200.0 0.0 10000.0\n2020-01-17 movie tickets 50.0 0.0 9950.0\n2020-01-18 online shopping 500.0 0.0 9450.0\n2020-01-19 car service 20

## Retrieval QnA with Groq

In [42]:
context = page_content
sys_prompt = f"""
Instructions:
- You are a finance guru that will help user analyse their bank statement.
- You will be given a bank statement in the form of a table with headings Date, Description, Debit, Credit, Balance.
- Debit is the amount of money taken out of the account.
- Credit is the amount of money put into the account.
- Balance is the remaining amount in the account.
- Help the user understand their spending habits and suggest ways to save money.
- Help the user understand their income and suggest ways to increase it.
- Dont show the table or from where you get the data only output the analysis.
- Do not give out of scope answer only answer what is asked.
Context: {context}
"""

In [36]:
from groq import Groq

In [37]:
import os
from dotenv import load_dotenv
load_dotenv()

client = Groq(
    api_key= os.getenv("GROQ_API_KEY"),
)

In [40]:
query = "is friend debt credited into my account or debited"

In [43]:
chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": f"{sys_prompt}"
            },
            {
                "role": "user",
                "content": f"{query}",
            }
        ],
        model="mixtral-8x7b-32768",
        temperature=0.5,
        max_tokens=2048,
        top_p=1,
        stop=None,
        stream=False,
    )

print( chat_completion.choices[0].message.content )

Based on the bank statements provided, "friend debt" is credited into your account, meaning you received a total of 12,000.0 (6,000.0 on 2020-01-04 and another 6,000.0 on 2020-01-24) from your friend.


## CohereEmbedding + ChromaDB

In [10]:
# from langchain_cohere import CohereEmbeddings
# from langchain_chroma import Chroma

In [11]:
# from dotenv import load_dotenv
# load_dotenv()

In [12]:
# dbPath = os.path.join(path)
# dbPath

In [13]:
# cohere_api_key = os.getenv("COHERE_API_KEY")
# db = Chroma.from_documents(pages, CohereEmbeddings(cohere_api_key=cohere_api_key), persist_directory=os.path.join(dbPath, "chroma"))

In [14]:
# db.delete_collection()

## Trial Only

In [15]:
# query = CohereEmbeddings().embed_query("how much i spend on movie tickets")
# print(query)

In [16]:
# result = db.similarity_search_by_vector(query)

In [17]:
# print(result[0].page_content)