In [1]:
import pandas as pd
import os
import psycopg2
from dotenv import load_dotenv, find_dotenv
import urllib.parse
from langchain_core.tools import tool
from thefuzz import fuzz
import re
import numpy as np

In [2]:
load_dotenv(find_dotenv())
DB_NAME=os.getenv('DB_NAME')
USERNAME=os.getenv('USERNAME')
PASSWORD=urllib.parse.quote(os.getenv('PASSWORD'))
HOSTNAME=os.getenv('HOSTNAME')
PORT=os.getenv('PORT')
FUZZ_RATIO_THRESHOLD = os.getenv('FUZZ_RATIO_THRESHOLD')
# Creating conneciton to database
conn = psycopg2.connect(f"dbname={DB_NAME} user={USERNAME} password={PASSWORD}")

In [5]:

def invoice_number_match(dataframe, reference_component, threshold):
    invoice_match=False
    for idx, row in dataframe.iterrows():
        fuzzy_ratio = fuzz.ratio(str(row['invoice_number']), reference_component)
        #print(fuzzy_ratio)
        if fuzzy_ratio is not None and fuzzy_ratio>threshold:
            invoice_match = True
            print("Invoice Number Match!")
            return idx, invoice_match

def customer_number_match(dataframe, reference_component, threshold):
    customer_match=False
    for idx, row in dataframe.iterrows():
        fuzzy_ratio = fuzz.ratio(str(row['customer_number']), reference_component)
        if fuzzy_ratio is not None and fuzzy_ratio>threshold:
            customer_match=True
            print("Customer Match!")
            return idx, customer_match

def amount_number_match(dataframe, reference_component, threshold):
    is_match=0
    for idx, row in dataframe.iterrows():
        fuzzy_ratio = fuzz.ratio(str(row['amount']), reference_component)
        if fuzzy_ratio is not None and fuzzy_ratio>threshold:
            is_match=1
            print("Amount number match!")
            return idx, is_match

def fill_details(in_idx,index,payments_dataframe, accounts_receivables_dataframe):
    if accounts_receivables_dataframe.loc[in_idx,'payment'] == None:
        accounts_receivables_dataframe.loc[in_idx,'payment'] = payments_dataframe.loc[index,'payment_amount']
    else:
        accounts_receivables_dataframe.loc[in_idx,'payment'] += payments_dataframe.loc[index,'payment_amount']
    accounts_receivables_dataframe.loc[in_idx,'payment_date'] = payments_dataframe.loc[index,'payment_date']
    accounts_receivables_dataframe.loc[in_idx, 'payment_id'] = payments_dataframe.loc[index,'transaction_id']
    return accounts_receivables_dataframe


def AccessAccountsReceivable():
    """Function to access the accounts receivables data table in Postgres"""
    accounts_receivables = pd.read_sql("SELECT * FROM accounts_receivable", conn)
    return accounts_receivables


def AccessPayments():
    """Function to access the payments received data table in postgress"""
    payments = pd.read_sql("SELECT * FROM payments", conn)
    return payments

# @tool
# def AccessCustomer():
#     """Function to access cutomer information and payment terms"""
#     customers = pd.read_sql("SELECT * FROM customers", conn) 
#     return customers


def PaymentReferenceSearch(payments_dataframe, accounts_receivables_dataframe):
    """Fuzzy search of payment reference string for a similarity check of each string.
    The payments_dataframe has the output of AccessPayments as input. The accounts_receivables dataframe
    has the outut of AccessAccountsReceivable as input."""
    #ayments_dataframe = 
    fuzz_threshold = int(FUZZ_RATIO_THRESHOLD)
    
    # Add additional column for payments_dataframe to categorise if payment has been matched or not.
    #payments_dataframe['matched'] = False

    try:
        for index, row in payments_dataframe.iterrows():
            # Try first just the payment reference information
            print(str(row['payment_reference']))
            pattern = r"\s"
            string_list=re.split(pattern, str(row['payment_reference']))
            print(string_list)
            customer_match = False
            invoice_match = False
            for component in string_list:
                component = component.strip()
                print(f"Element: {component}")
                if component == None:
                    pass
                else:
                    # for i in range(1):
                    #     print(f"ROUND:{i}")

                    if customer_match:
                        # Invoice number match
                        print("Starting Invoice Number Match")
                        try:
                            in_idx, invoice_match = invoice_number_match(dataframe=accounts_receivables_dataframe,
                                    reference_component=component,
                                    threshold=fuzz_threshold)
                            
                        except Exception as e:
                            print(f"Error Invoice Match: {e}")
                            pass
                    
                    else:
                        
                        print("Starting Customer Number Match")
                        try:
                            cs_idx, customer_match = customer_number_match(dataframe=accounts_receivables_dataframe,
                                                reference_component=component,
                                                threshold=fuzz_threshold)

                        except Exception as e:
                            print(f"Error Customer Match: {e}")
                            pass
                        
                    if invoice_match == True and customer_match == True:
                        print("Invoice and Customer matched!")
                        payments_dataframe.loc[index,'matched'] = True
                        fill_details(in_idx,index, payments_dataframe,accounts_receivables_dataframe)
                        break
                     
                    else:
                      payments_dataframe.loc[index,'matched'] = False
                        
            #return accounts_receivables_dataframe
                
    except Exception as e:
        print(f"OUTER ERROR: {e}")
        pass

In [6]:
payments = AccessPayments()
acc_receivables = AccessAccountsReceivable()

  payments = pd.read_sql("SELECT * FROM payments", conn)
  accounts_receivables = pd.read_sql("SELECT * FROM accounts_receivable", conn)


In [7]:
payments

Unnamed: 0,transaction_id,payment_date,payment_amount,payment_reference
0,948347,2025-01-31,25000.0,12038 - 1 - We'll pay the rest later
1,34847,2025-01-20,100000.0,Customer Nr 12000 - Invoice Nr 2 - Mom's
2,29304,2025-01-02,12500.0,12010 - 4
3,3837459,2025-10-02,10000.0,11900 - 5
4,390576,2025-02-14,76000.0,12990 - Invoice Nr. 3
5,302947,2025-02-18,25000.0,12038 - 1 - Second payment
6,39506,2025-02-28,1000.0,12933 - 6 - Grazie
7,2394759,2025-01-03,76000.0,A little something for your troubles
8,390475,2025-03-13,80000.0,11900 -10


In [8]:
acc_receivables

Unnamed: 0,invoice_number,date,customer_name,customer_number,amount,due_date,payment,payment_date,payment_id
0,1,2025-05-01,Planet Express,12038,50000.0,2025-05-02,,,
1,2,2025-09-01,Mom's Friendly Robot Factory,12000,100000.0,2025-09-02,,,
2,3,2025-10-01,Romanticorp,12990,73640.0,2025-10-02,,,
3,4,2025-01-18,Hal Insitute for Criminally Insane Robots,12010,12500.0,2025-02-18,,,
4,5,2025-01-21,Cookieville Minimum-Security Orphanarium,11900,10000.0,2025-02-21,,,
5,6,2025-01-29,Panucci's Pizza,12933,1000.0,2025-02-28,,,
6,7,2025-02-02,Planet Express,12038,12300.0,2025-02-03,,,
7,8,2025-02-02,Romanticorp,12990,50000.0,2025-02-03,,,
8,9,2025-03-02,Malfunctioning Eddie's Rocket-Car Emporium,12230,76000.0,2025-03-03,,,
9,10,2025-05-02,Cookieville Minimum-Security Orphanarium,11900,80000.0,2025-05-02,,,


In [9]:
test = acc_receivables.to_dict('records')

In [10]:
test

[{'invoice_number': 1,
  'date': datetime.date(2025, 5, 1),
  'customer_name': 'Planet Express',
  'customer_number': 12038,
  'amount': 50000.0,
  'due_date': datetime.date(2025, 5, 2),
  'payment': None,
  'payment_date': None,
  'payment_id': None},
 {'invoice_number': 2,
  'date': datetime.date(2025, 9, 1),
  'customer_name': "Mom's Friendly Robot Factory",
  'customer_number': 12000,
  'amount': 100000.0,
  'due_date': datetime.date(2025, 9, 2),
  'payment': None,
  'payment_date': None,
  'payment_id': None},
 {'invoice_number': 3,
  'date': datetime.date(2025, 10, 1),
  'customer_name': 'Romanticorp',
  'customer_number': 12990,
  'amount': 73640.0,
  'due_date': datetime.date(2025, 10, 2),
  'payment': None,
  'payment_date': None,
  'payment_id': None},
 {'invoice_number': 4,
  'date': datetime.date(2025, 1, 18),
  'customer_name': 'Hal Insitute for Criminally Insane Robots',
  'customer_number': 12010,
  'amount': 12500.0,
  'due_date': datetime.date(2025, 2, 18),
  'payment'

In [None]:
for i in range(len(acc_receivables)):
    record = str(acc_receivables.iloc[i].to_dict())
    for j in range(len(acc))


    print(record)

{'invoice_number': 1, 'date': datetime.date(2025, 5, 1), 'customer_name': 'Planet Express', 'customer_number': 12038, 'amount': 50000.0, 'due_date': datetime.date(2025, 5, 2), 'payment': None, 'payment_date': None, 'payment_id': None}
{'invoice_number': 2, 'date': datetime.date(2025, 9, 1), 'customer_name': "Mom's Friendly Robot Factory", 'customer_number': 12000, 'amount': 100000.0, 'due_date': datetime.date(2025, 9, 2), 'payment': None, 'payment_date': None, 'payment_id': None}
{'invoice_number': 3, 'date': datetime.date(2025, 10, 1), 'customer_name': 'Romanticorp', 'customer_number': 12990, 'amount': 73640.0, 'due_date': datetime.date(2025, 10, 2), 'payment': None, 'payment_date': None, 'payment_id': None}
{'invoice_number': 4, 'date': datetime.date(2025, 1, 18), 'customer_name': 'Hal Insitute for Criminally Insane Robots', 'customer_number': 12010, 'amount': 12500.0, 'due_date': datetime.date(2025, 2, 18), 'payment': None, 'payment_date': None, 'payment_id': None}
{'invoice_number'

In [None]:
def row_gen(dataframe):
    for _, row in dataframe.iterrows():
        yield str(row.to_dict())




In [44]:
rows = row_gen(acc_receivables)

In [45]:
next(rows)

"{'invoice_number': 1, 'date': datetime.date(2025, 5, 1), 'customer_name': 'Planet Express', 'customer_number': 12038, 'amount': 50000.0, 'due_date': datetime.date(2025, 5, 2), 'payment': None, 'payment_date': None, 'payment_id': None}"

In [47]:
test = acc_receivables.to_dict()

In [48]:
pd.DataFrame(data=test)

Unnamed: 0,invoice_number,date,customer_name,customer_number,amount,due_date,payment,payment_date,payment_id
0,1,2025-05-01,Planet Express,12038,50000.0,2025-05-02,,,
1,2,2025-09-01,Mom's Friendly Robot Factory,12000,100000.0,2025-09-02,,,
2,3,2025-10-01,Romanticorp,12990,73640.0,2025-10-02,,,
3,4,2025-01-18,Hal Insitute for Criminally Insane Robots,12010,12500.0,2025-02-18,,,
4,5,2025-01-21,Cookieville Minimum-Security Orphanarium,11900,10000.0,2025-02-21,,,
5,6,2025-01-29,Panucci's Pizza,12933,1000.0,2025-02-28,,,
6,7,2025-02-02,Planet Express,12038,12300.0,2025-02-03,,,
7,8,2025-02-02,Romanticorp,12990,50000.0,2025-02-03,,,
8,9,2025-03-02,Malfunctioning Eddie's Rocket-Car Emporium,12230,76000.0,2025-03-03,,,
9,10,2025-05-02,Cookieville Minimum-Security Orphanarium,11900,80000.0,2025-05-02,,,
