In [3]:
import pandas as pd


invoice       = pd.read_parquet("dataset_creados\\invoice.parquet")
invoice_line  = pd.read_parquet("dataset_creados\\invoiceLine.parquet")
partner       = pd.read_parquet("dataset_creados\\partner.parquet")
product       = pd.read_parquet("dataset_creados\\product.parquet")
payment       = pd.read_parquet("dataset_creados\\payment.parquet")
payment_app   = pd.read_parquet("dataset_creados\\paymentApplication.parquet")




In [4]:
def test_result(nombre, condicion):
    if condicion:
        estado = "Columnas completas"
    else:
        estado = "Columnas incompletas"

    print(f"{nombre}: {estado}")

In [5]:
tests = []

tests.append(("Invoice: columnas requeridas",
    set(["id", "invoice_date", "due_date", "partner_id", "invoice_type", "total_amount", "remaining_balance"]).issubset(invoice.columns)
))

tests.append(("InvoiceLine: columnas requeridas",
    set(["id", "invoice_id", "product_id", "quantity", "price"]).issubset(invoice_line.columns)
))

tests.append(("Product: columnas requeridas",
    set(["id", "product_name", "cost_per_unit"]).issubset(product.columns)
))

tests.append(("Partner: columnas requeridas",
    set(["id", "display_name"]).issubset(partner.columns)
))

tests.append(("Payment: columnas requeridas",
    set(["id", "payment_date", "amount"]).issubset(payment.columns)
))

tests.append(("PaymentApplication: columnas requeridas",
    set(["payment_id", "invoice_id", "allocated_amount"]).issubset(payment_app.columns)
))

for nombre, condicion in tests:
    test_result(nombre, condicion)


Invoice: columnas requeridas: Columnas completas
InvoiceLine: columnas requeridas: Columnas completas
Product: columnas requeridas: Columnas completas
Partner: columnas requeridas: Columnas completas
Payment: columnas requeridas: Columnas completas
PaymentApplication: columnas requeridas: Columnas completas


In [6]:
# aquí es donde verificamos la integridad referencial 
tests = []

tests.append(("InvoiceLine → Invoice",
    invoice_line["invoice_id"].isin(invoice["id"]).all()
))

tests.append(("InvoiceLine → Product",
    invoice_line["product_id"].isin(product["id"]).all()
))

tests.append(("Invoice → Partner",
    invoice["partner_id"].isin(partner["id"]).all()
))

tests.append(("PaymentApplication → Payment",
    payment_app["payment_id"].isin(payment["id"]).all()
))

tests.append(("PaymentApplication → Invoice",
    payment_app["invoice_id"].isin(invoice["id"]).all()
))

for nombre, condicion in tests:
    test_result(nombre, condicion)

InvoiceLine → Invoice: Columnas completas
InvoiceLine → Product: Columnas completas
Invoice → Partner: Columnas completas
PaymentApplication → Payment: Columnas completas
PaymentApplication → Invoice: Columnas completas


In [7]:
tests = []

tests.append(("Invoice: total_amount > 0",
    (invoice["total_amount"] > 0).all()
))

tests.append(("InvoiceLine: quantity > 0",
    (invoice_line["quantity"] > 0).all()
))

tests.append(("InvoiceLine: price ≥ 0",
    (invoice_line["price"] >= 0).all()
))

tests.append(("Invoice: remaining_balance ≤ total_amount",
    (invoice["remaining_balance"] <= invoice["total_amount"]).all()
))

tests.append(("Invoice: due_date ≥ invoice_date",
    (invoice["due_date"] >= invoice["invoice_date"]).all()
))

tests.append(("Payment: amount > 0",
    (payment["amount"] > 0).all()
))

tests.append(("PaymentApp: allocated_amount ≤ payment.amount",
    payment_app["allocated_amount"].le(
        payment_app.merge(payment, left_on="payment_id", right_on="id")["amount"]
    ).all()
))

for nombre, condicion in tests:
    test_result(nombre, condicion)


Invoice: total_amount > 0: Columnas completas
InvoiceLine: quantity > 0: Columnas completas
InvoiceLine: price ≥ 0: Columnas completas
Invoice: remaining_balance ≤ total_amount: Columnas completas
Invoice: due_date ≥ invoice_date: Columnas incompletas
Payment: amount > 0: Columnas completas
PaymentApp: allocated_amount ≤ payment.amount: Columnas completas


Unnamed: 0,payment_id,invoice_id,allocated_amount,id
0,1,2,999999,1


In [None]:
errores = {}



errores["remaining_balance > total_amount"] = invoice[invoice["remaining_balance"] > invoice["total_amount"]]

# allocated_amount mayor al payment.amount
errores["allocated_amount > payment.amount"] = payment_app[
    payment_app["allocated_amount"] >
    payment_app.merge(payment, left_on="payment_id", right_on="id")["amount"]
]

# Fechas invertidas
errores["due_date < invoice_date"] = invoice[invoice["due_date"] < invoice["invoice_date"]]

# Product inexistente
errores["product_id inexistente"] = invoice_line[~invoice_line["product_id"].isin(product["id"])]

# Partner inexistente
errores["partner_id inexistente"] = invoice[~invoice["partner_id"].isin(partner["id"])]

# Facturas sin líneas
errores["Factura sin líneas"] = invoice[~invoice["id"].isin(invoice_line["invoice_id"])]



In [14]:
import pandas as pd
payment = pd.read_parquet("dataset_creados\\payment.parquet")
payment.head(5)
invoice.loc[0, "due_date"] = invoice.loc[0, "invoice_date"] - pd.Timedelta(days=5)
payment_app.loc[0, "allocated_amount"] = 999999
journal = journal = pd.read_parquet("dataset_creados\\journalLine.parquet")


journal.loc[0, "credit"] = journal.loc[0, "debit"] - 100
errores = {}
errores["remaining_balance > total_amount"] = invoice[invoice["remaining_balance"] > invoice["total_amount"]]
errores["allocated_amount > payment.amount"] = payment_app[
    payment_app["allocated_amount"] >
    payment_app.merge(payment, left_on="payment_id", right_on="id")["amount"]
]
errores['allocated_amount > payment.amount']



Unnamed: 0,payment_id,invoice_id,allocated_amount,id
0,1,2,999999,1


In [15]:
errores["due_date < invoice_date"] = invoice[invoice["due_date"] < invoice["invoice_date"]]

errores["due_date < invoice_date"]

Unnamed: 0,id,partner_id,currency_id,invoice_type,invoice_date,due_date,total_amount,invoice_id,amount_paid,remaining_balance,payment_state
0,1,33,2,out,2025-08-16 11:01:29.836924,2025-08-11 11:01:29.836924,2108,,0.0,2108.0,unpaid


PLUSSS -DATOS EXTRAS
Tipo 1 — Error de suma simple

→ Diferencia = 10, 100, 1000
→ Falta de suma correcta en Debe/Haber

Tipo 2 — Error de clasificación (cargo vs abono)

→ Diferencia divisible exacto entre 2
→ Se anotó una cuenta como deudora cuando debía ser acreedora, o viceversa

Tipo 3 — Error de transposición o desliz (digit swap o shift)

→ Diferencia divisible exacto entre 9
→ Se invirtieron dígitos o se movió el número a la derecha/izquierda

Tipo 4 — Error por omisión

→ Diferencia NO divisible entre 2 o 9
→ Un asiento completo no se pasó
→ Falta de cargo o abono en libro mayor