In [1]:
import os
import sys

# root path
ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))

# Add the project root to the Python path
if ROOT not in sys.path:
    sys.path.append(ROOT)

import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import re
from datetime import datetime
from fpdf import FPDF

# Import the functions from Task 2
from src.data.data_functions import (
    earnings_and_expenses,
    expenses_summary,
    cash_flow_summary,
)

In [3]:
import polars as pl

relevant_columns = ['client_id', 'date', 'amount', 'mcc']
df = (pl.read_csv("../data/raw/transactions_data.csv")
      .filter(pl.col('client_id') == 126)
      .select(relevant_columns)
      .to_pandas()
)

In [9]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from src.agent.tools import extract_dates

def run_agent(df: pd.DataFrame, client_id: int, input: str) -> dict:
    """
    Returns
    -------
    variables_dict : dict
        Dictionary of the variables of the query.
            {
                "start_date": "YYYY-MM-DD",
                "end_date" : "YYYY-MM-DD",
                "client_id": int,
                "create_report" : bool
            }

    """
    tools = []
    
    model = ChatOllama(
        model="llama3.2:1b",
        temperature=0,
    )

    # Create the prompt template with specific instructions
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                (
                    "You are a helpful assistant that extracts start and end dates from the given text."
                    "For each date found, output it in the format START_DATE and END_DATE."
                    "Dates should be formatted as 'YYYY-MM-DD'."
                    "If no day is provided in each date, assume the day is 01."
                    "If there is reference to only one month, assume the start day is 01 and the end day is the last day of the month."
                    "Return only 2 dates. Do not include any additional text."
                ),
            ),
            ("human", "{input_prompt}"),
        ]
    )

    # Create the chain
    chain = prompt | model

    # Extract dates from the input
    START, END = extract_dates(chain, input)
    print(START, END)

    # Extract results dataframes
    df = df.copy()
    df['date'] = pd.to_datetime(df['date'])
    earnings_and_expenses_df = earnings_and_expenses(df, client_id, START, END)
    expenses_df = expenses_summary(df, client_id, START, END)
    cash_flow_df = cash_flow_summary(df, client_id, START, END)
    print(earnings_and_expenses_df)
    print(expenses_df)
    print(cash_flow_df)

    pdf_output_folder = "reports/"

    variables_dict = {
        "start_date": "YYYY-MM-DD",
        "end_date": "YYYY-MM-DD",
        "client_id": 0,
        "create_report": False,
    }

    return variables_dict


client_id = 666666666
input = "Create a pdf report from 2013-01-01 to 2020-01-31"
run_agent(df, client_id, input)

2013-01-01 2020-01-31
Earnings and expenses plot saved at /home/ezemriv/other_projects/hackathon-caixabank-data-ai-report/reports/figures/earnings_and_expenses.png
   Earnings  Expenses
0       0.0       0.0
Empty DataFrame
Columns: [Expenses Type, Total Amount, Average, Max, Min, Num. Transactions]
Index: []
Empty DataFrame
Columns: [Date, Inflows, Outflows, Net Cash Flow, % Savings]
Index: []


{'start_date': 'YYYY-MM-DD',
 'end_date': 'YYYY-MM-DD',
 'client_id': 0,
 'create_report': False}

In [7]:
client_id = 126

result = expenses_summary(df, client_id, "2013-01-01", "2020-01-31")
result

Expenses summary plot saved at /home/ezemriv/other_projects/hackathon-caixabank-data-ai-report/reports/figures/expenses_summary.png


Unnamed: 0,Expenses Type,Total Amount,Average,Max,Min,Num. Transactions
0,"Electroplating, Plating, Polishing Services",426.0,426.0,426.0,426.0,1
1,Gardening Supplies,446.0,446.0,446.0,446.0,1
2,Heat Treating Metal Services,457.0,457.0,457.0,457.0,1
3,"Lighting, Fixtures, Electrical Supplies",477.0,477.0,477.0,477.0,1
4,Miscellaneous Food Stores,6871.0,75.51,50.0,99.0,91
5,Miscellaneous Machinery and Parts Manufacturing,156.0,156.0,156.0,156.0,1
6,Non-Ferrous Metal Foundries,179.0,179.0,179.0,179.0,1
7,Railroad Passenger Transport,437.0,437.0,437.0,437.0,1
8,Service Stations,8241.0,73.58,51.0,100.0,112
9,Ship Chandlers,556.0,278.0,225.0,331.0,2


In [9]:
from fpdf import FPDF

class PDF(FPDF):
    def header(self):
        self.set_font("Arial", "B", 12)
        self.cell(0, 10, "Client Report", align="C", ln=True)
        self.ln(10)

    def add_table(self, data_frame, title):
        # Table title
        self.set_font("Arial", "B", 12)
        self.cell(0, 10, title, ln=True)
        self.ln(5)
        
        # Column headers
        self.set_font("Arial", "B", 10)
        col_width = self.epw / len(data_frame.columns)  # Column width is evenly distributed
        for col_name in data_frame.columns:
            self.cell(col_width, 10, col_name, border=1, align="C")
        self.ln()
        
        # Table rows
        self.set_font("Arial", "", 8)
        for _, row in data_frame.iterrows():
            for item in row:
                self.cell(col_width, 10, str(item), border=1, align="C")
            self.ln()
        self.ln(10)  # Space after table

# Example usage:
pdf = PDF()
pdf.add_page()

# Assuming `earnings_and_expenses_df`, `expenses_df`, and `cash_flow_df` are your DataFrames
pdf.add_table(result, "Earnings and Expenses Summary")
pdf.add_table(expenses_df, "Expenses Summary by Merchant Category")
pdf.add_table(cash_flow_df, "Cash Flow Summary")

# Save PDF
pdf.output("client_report.pdf")
