In [1]:
from src.data.data_retriever import DataRetriever
from utils.odoo_connector import OdooConnection
import asyncio
import nest_asyncio
from forex_python.converter import CurrencyRates
import pandas as pd
import numpy as np
from datetime import datetime
nest_asyncio.apply()
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [2]:
def odoo_missing_values_to_null(df):
    object_cols = df.select_dtypes(include='object').columns
    print(object_cols)
    df[object_cols] = (df[object_cols].replace({False: pd.NA, '' : pd.NA, '/' : pd.NA}))
    df[object_cols] = df[object_cols].applymap(lambda x: np.nan if x == [] else x)
    return df

In [3]:
def convert_to_datetime(df, columns):
    for col in columns:
        try:
            # formato que no soporta nativamente to_datetime
            if df[col].astype(str).str.contains('/').any():
                df[col] = pd.to_datetime(df[col], errors='coerce', format='%d/%m/%Y')
            else:
                df[col] = pd.to_datetime(df[col], errors='coerce')
        except Exception as e:
            print(f"Error al convertir '{col}': {e}")
    return df

In [4]:
def check_invalid_date_format(series, date_format='%d/%m/%Y'):
    invalid = []
    for val in series.dropna():
        val = str(val).strip()
        if val == '' or val.lower() == 'false':
            continue
        try:
            datetime.strptime(val, date_format)
        except ValueError:
            invalid.append(val)
    return pd.Series(invalid).drop_duplicates().reset_index(drop=True)

# Exploración de los datos

# 1. Estructura de la base de datos

## 1.1. Modelos relevantes

En está sección describo los distintos modelos que deberán ser utilizados. 

También he seleccionado los campos que pueden llegar a ser importantes para el desarrollo. Cabe remarcar que con "importantes", no me refiero únicamente para entrenar el modelo de predicción de impagos, también tengo en cuenta información que el agente podría necesitar.

### 1.1.1. res.company

Contiene información sobre las empresas que forman el grupo (no clientes):
- id
- name
- currency_id (Identificador de la moneda [id, nombre])

### 1.1.2. res.partner

Contiene información sobre los partners (clientes/proveedores):
- id
- name
- email
- phone
- street
- city
- zip
- country_id
- customer_rank (>0 es cliente)
- supplier_rank (>0 es proveedor)
- category_id (sector/industria a la que pertenece)
- is_company
- company_type
- company_id
- credit
- credit_limit
- debit
- debit_limit
- industry_id
- invoice_ids
- total_due
- total_invoiced
- total_overdue
- trust
- unpaid_invoice_ids
- unpaid_invoices_count

### 1.1.3. account.move

Guarda todas las facturas y movimientos contables de la empresa, es decir, los registros de todo lo que se compra, se vende o se paga. Este será el modelo principal con el que trabajará el agente:
- id
- name
- move_type ("out_invoice", "in_invoice", "out_refund", "in_refund", "entry")
- payment_state ("not_paid", "in_payment", "paid", "partial", "reversed")
- company_id
- partner_id
- currency_id
- amount_total
- amount_paid
- amount_residual
- invoice_date
- invoice_date_due
- payment_dates
- date
- create_date
- payment_id
- payment_ids

### 1.1.4. res.currency

Contiene información sobre las monedas en las que se emiten facturas y se registran los movimientos:
- id
- name
- symbol
- rate

### 1.1.5. res.country

Contiene información sobre los países:
- id
- name
- code

### 1.1.6. res.partner.category

Representa las categorías asignadas a los partners:
- id
- name

### 1.1.7. res.partner.industry

Contiene información de la industria / sector económico de los partners:
- id
- name

## 1.2. Exploración de los datos

#### Conexión a Odoo

In [5]:
odoo_connection = OdooConnection()
asyncio.run(odoo_connection.connect())
data_retriever = DataRetriever(odoo_connection=odoo_connection)

Connected to Odoo as albert.gil@yourtechtribe.com (uid: 430)
Odoo server version: {'server_version': '16.0+e-20250313', 'server_version_info': [16, 0, 0, 'final', 0, 'e'], 'server_serie': '16.0', 'protocol_version': 1}


In [6]:
#company_df_original = pd.DataFrame(asyncio.run(data_retriever.get_all_companies()))
all_invoices_df_original = pd.DataFrame(asyncio.run(data_retriever.get_all_invoices()))
#partners_df_original = pd.DataFrame(asyncio.run(data_retriever.get_all_customer_partners()))
#currencies_df_original = pd.DataFrame(asyncio.run(data_retriever.get_all_currencies()))
#partner_categories_df_original = pd.DataFrame(asyncio.run(data_retriever.get_all_partner_categories()))
#industrys_df_original = pd.DataFrame(asyncio.run(data_retriever.get_all_industries()))
#invoice_lines_df_original = pd.DataFrame(asyncio.run(data_retriever.get_all_lines_of_all_outbound_invoices()))

Recuperadas 500 facturas, total: 500
Recuperadas 500 facturas, total: 3000
Recuperadas 500 facturas, total: 5500
Recuperadas 500 facturas, total: 8000
Recuperadas 500 facturas, total: 10500
Recuperadas 500 facturas, total: 13000
Recuperadas 500 facturas, total: 15500
Recuperadas 500 facturas, total: 18000
Recuperadas 500 facturas, total: 20500
Recuperadas 500 facturas, total: 23000
Recuperadas 500 facturas, total: 25500
Recuperadas 500 facturas, total: 28000
Recuperadas 500 facturas, total: 30500
Recuperadas 500 facturas, total: 33000
Recuperadas 500 facturas, total: 35500
Recuperadas 500 facturas, total: 38000
Recuperadas 500 facturas, total: 40500
Recuperadas 500 facturas, total: 43000
Recuperadas 500 facturas, total: 45500
Recuperadas 500 facturas, total: 48000
Recuperadas 500 facturas, total: 50500
Recuperadas 500 facturas, total: 53000
Recuperadas 500 facturas, total: 55500
Recuperadas 500 facturas, total: 58000
Recuperadas 500 facturas, total: 60500
Recuperadas 500 facturas, tota

In [7]:
invoice_lines_df_original.to_pickle("invoice_lines.pkl")

In [7]:
#company_df_original.to_pickle("companies.pkl")
#invoices_df_original.to_pickle("invoices.pkl")
#partners_df_original.to_pickle("partners.pkl")
all_invoices_df_original.to_pickle("all_invoices.pkl")
#currencies_df_original.to_pickle("currencies.pkl")
#partner_categories_df_original.to_pickle("partner_categories.pkl")
#industrys_df_original.to_pickle("industries.pkl")  

In [6]:
invoices_df = pd.read_pickle("invoices.pkl")
company_df = pd.read_pickle("companies.pkl")
partners_df = pd.read_pickle("partners.pkl")
currencies_df = pd.read_pickle("currencies.pkl")
partner_categories_df = pd.read_pickle("partner_categories.pkl")
industries_df = pd.read_pickle("industries.pkl")

In [123]:
invoices_df = invoices_df_original.copy()
company_df = company_df_original.copy()
partners_df = partners_df_original.copy()
currencies_df = currencies_df_original.copy()
partner_categories_df = partner_categories_df_original.copy()

NameError: name 'company_df_original' is not defined

In [None]:
invoices_df = pd.read_csv
company_df = company_df_original.copy()
partners_df = partners_df_original.copy()
currencies_df = currencies_df_original.copy()
partner_categories_df = partner_categories_df_original.copy()

### 1.2.1. res.company

In [46]:
company_df

Unnamed: 0,id,name,currency_id
0,1,"Grupo Viko Digital Marketing, S.A.","[1, EUR]"
1,3,Elogia Media S.L.,"[1, EUR]"
2,2,Ibrands Medios Interactivos SL,"[1, EUR]"
3,5,Kraz Data Solutions SL,"[1, EUR]"
4,6,Marketing4ecommerce Digital Content SL,"[1, EUR]"
5,13,Octoplus Digital Shelf Optimization SL,"[1, EUR]"
6,7,Tandem Trade Marketing SL,"[1, EUR]"
7,11,"DigitalPla2021, S.L.","[1, EUR]"
8,14,Ideas y Estrategia Digital SL,"[1, EUR]"
9,8,INICIATIVAS VIRTUALES DE MEXICO,"[33, MXN]"


El grupo está formado por 12 empresas, 9 usan el euro y 3 el peso mexicano.

### 1.2.2. account.move

#### Inspección inicial

In [40]:
invoices_df.head(7000)

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
0,198507,403-0199881-4444363,out_invoice,not_paid,7,1,1,43.560,43.560,2025-11-10,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
1,198549,FVM/2025/00068,out_invoice,not_paid,6,14913,1,121.000,121.000,2025-11-07,2025-12-07,,Marketing4ecommerce Digital Content SL,"Ser Sport, S.L.",EUR
3,198522,ES501155NOOJRS,out_invoice,not_paid,7,1,1,116.160,116.160,2025-11-06,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
4,198516,ES501152NOOJRS,out_invoice,not_paid,7,1,1,43.560,43.560,2025-11-06,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
6,198471,FVM/2025/00067,out_invoice,not_paid,6,8870,1,2420.000,2420.000,2025-11-05,2025-11-05,,Marketing4ecommerce Digital Content SL,Hero España SA,EUR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7037,174681,INV-FR-476254813-2024-347,out_invoice,paid,7,13924,1,10.200,0.000,2024-12-01,2024-12-16,03/12/2024,Tandem Trade Marketing SL,Marketplaces Francia,EUR
7038,174697,INV-FR-476254813-2024-346,out_invoice,paid,7,13924,1,10.200,0.000,2024-12-01,2024-12-16,03/12/2024,Tandem Trade Marketing SL,Marketplaces Francia,EUR
7039,174664,INV-DE-476254813-2024-74,out_invoice,paid,7,13925,1,15.910,0.000,2024-12-01,2024-12-16,03/12/2024,Tandem Trade Marketing SL,Marketplaces Alemania,EUR
7040,174701,INV-DE-476254813-2024-73,out_invoice,paid,7,10892,1,19.820,0.000,2024-12-01,2024-12-16,03/12/2024,Tandem Trade Marketing SL,Marketplaces España,EUR


A simple vista se puede apreciar:
- Se deben convertir los valores '', [] y False (en columnas no booleanas) a NA
- Facturas sin nombre o con formatos muy diferentes
- Las últimas facturas aparecen impagadas por ser demasiado recientes
- Parece que hay algunos campos que no tienen fecha de la factura
- payment_id y payment_ids parecen no tener nada

Convierto los valores False / listas vacías a valores NA

In [9]:
invoices_df = odoo_missing_values_to_null(invoices_df)

Index(['name', 'move_type', 'payment_state', 'company_id', 'partner_id',
       'currency_id', 'invoice_date', 'invoice_date_due', 'payment_dates',
       'date', 'create_date', 'payment_ids'],
      dtype='object')


  df[object_cols] = df[object_cols].applymap(lambda x: np.nan if x == [] else x)


In [10]:
invoices_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24288 entries, 0 to 24287
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   id                24288 non-null  int64  
 1   name              24248 non-null  object 
 2   move_type         24288 non-null  object 
 3   payment_state     24288 non-null  object 
 4   company_id        24288 non-null  object 
 5   partner_id        24280 non-null  object 
 6   currency_id       24288 non-null  object 
 7   amount_total      24288 non-null  float64
 8   amount_paid       24288 non-null  float64
 9   amount_residual   24288 non-null  float64
 10  invoice_date      24276 non-null  object 
 11  invoice_date_due  24288 non-null  object 
 12  payment_dates     19048 non-null  object 
 13  date              24288 non-null  object 
 14  create_date       24288 non-null  object 
 15  payment_id        24288 non-null  bool   
 16  payment_ids       0 non-null      float6

Analizando los valores null:
- payment_ids es todo null
- Hay 12 facturas sin fecha, 8 sin información sobre el cliente y 40 sin nombre
- Varias facturas sin fecha del pago (impagadas o recientes)

A simple vista, date, create_date, payment_id y payment_ids no son de utilidad, las dos primeras no aportan ninguna información, ya tengo invoice_date, invoice_date_due y payment_dates y las dos últimas son todo False y NA.

In [11]:
invoices_df = invoices_df.drop(columns=['date', 'create_date', 'payment_id', 'payment_ids'])

Las listas company_id, partner_id y currency_id, las separaré en dos columnas cada una

In [12]:
invoices_df['company_name'] = invoices_df['company_id'].apply(lambda x: x[1])
invoices_df['company_id'] = invoices_df['company_id'].apply(lambda x: x[0])

In [13]:
invoices_df['partner_name'] = invoices_df['partner_id'].apply(lambda x: x[1] if isinstance(x, list) else pd.NA)
invoices_df['partner_id'] = invoices_df['partner_id'].apply(lambda x: x[0] if isinstance(x, list) else pd.NA)
invoices_df['currency_name'] = invoices_df['currency_id'].apply(lambda x: x[1])
invoices_df['currency_id'] = invoices_df['currency_id'].apply(lambda x: x[0])

In [14]:
invoices_df.head(10000)

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_paid,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
0,198507,403-0199881-4444363,out_invoice,not_paid,7,1,1,43.560,0.000,43.560,2025-11-10,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
1,198549,FVM/2025/00068,out_invoice,not_paid,6,14913,1,121.000,0.000,121.000,2025-11-07,2025-12-07,,Marketing4ecommerce Digital Content SL,"Ser Sport, S.L.",EUR
2,198548,,out_invoice,not_paid,6,14913,1,726.000,0.000,726.000,,2025-12-07,,Marketing4ecommerce Digital Content SL,"Ser Sport, S.L.",EUR
3,198522,ES501155NOOJRS,out_invoice,not_paid,7,1,1,116.160,0.000,116.160,2025-11-06,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
4,198516,ES501152NOOJRS,out_invoice,not_paid,7,1,1,43.560,0.000,43.560,2025-11-06,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,158574,RECEIPT-IT-476254813-2024-791,out_invoice,paid,7,13926,1,11.990,0.000,0.000,2024-10-07,2024-10-22,22/10/2024,Tandem Trade Marketing SL,Marketplaces Italia,EUR
9996,158560,RECEIPT-IT-476254813-2024-790,out_invoice,paid,7,13926,1,11.990,0.000,0.000,2024-10-07,2024-10-22,22/10/2024,Tandem Trade Marketing SL,Marketplaces Italia,EUR
9997,158563,RECEIPT-IT-476254813-2024-789,out_invoice,paid,7,13926,1,11.990,0.000,0.000,2024-10-07,2024-10-22,22/10/2024,Tandem Trade Marketing SL,Marketplaces Italia,EUR
9998,158570,RECEIPT-IT-476254813-2024-788,out_invoice,paid,7,13926,1,11.990,0.000,0.000,2024-10-07,2024-10-22,07/10/2024,Tandem Trade Marketing SL,Marketplaces Italia,EUR


In [15]:
invoices_df["id"].duplicated().sum()

np.int64(0)

No hay duplicados

In [14]:
invoices_df.nunique()

id                  24288
name                23746
move_type               1
payment_state           5
company_id             12
partner_id           1549
currency_id             6
amount_total         6836
amount_paid             1
amount_residual       861
invoice_date          981
invoice_date_due     1091
payment_dates         817
company_name           12
partner_name         1548
currency_name           6
dtype: int64

Se puede apreciar:
- Varios nombres de facturas vacíos y duplicados como se ha visto antes
- Aunque las empresas trabajan con dos monedas internamente, han operado con clientes en 6 monedas diferentes, habrá que hacer las conversiones
- Parece que amount_paid no tiene ningún valor (0) por tanto, tampoco aporta ninguna información, amount_residual ya tiene lo que falta por pagar, se puede inferir la cantidad pagada
- Todos los otros campos corresponden a lo esperado

#### amount_paid

In [15]:
invoices_df['amount_paid'].mean()

np.float64(0.0)

In [21]:
invoices_df = invoices_df.drop(columns=['amount_paid'])

#### name

Todas las facturas sin nombre no han sido pagadas

In [22]:
invoices_na_name = invoices_df[invoices_df["name"].isna()]

In [23]:
invoices_na_name.head()

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
2,198548,,out_invoice,not_paid,6,14913,1,726.0,726.0,,2025-12-07,,Marketing4ecommerce Digital Content SL,"Ser Sport, S.L.",EUR
5,198515,,out_invoice,not_paid,7,1,1,43.56,43.56,2025-11-06,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
10,198444,,out_invoice,not_paid,6,15058,1,60.5,60.5,,2025-11-28,,Marketing4ecommerce Digital Content SL,"Banbu Cosmética Natural, S.L.",EUR
1134,197217,,out_invoice,not_paid,7,10892,1,1.0,1.0,2025-07-28,2025-08-12,,Tandem Trade Marketing SL,Marketplaces España,EUR
1793,196524,,out_invoice,not_paid,7,14617,1,12.2,12.2,,2025-06-05,,Tandem Trade Marketing SL,Marketplaces EMPRESA Italia,EUR


In [24]:
for col in invoices_na_name.columns:
    print(f"Columna: {col}")
    print(invoices_na_name[col].unique())

Columna: id
[198548 198515 198444 197217 196524 196523 196186 196144 195208 195203
 195137 195020 194558 194555 194552 193263 192960 192730 192721 192712
 192709 192706 192700 192697 192694 192682 192658 192655 192629 192628
 192623 189566 195202 195200 189531 182747 181419 179523  49220  45100]
Columna: name
[<NA>]
Columna: move_type
['out_invoice']
Columna: payment_state
['not_paid']
Columna: company_id
[6 7 3 8 1]
Columna: partner_id
[14913 1 15058 10892 14617 8571 8996 12056 8870 13043 12537 11041 13567
 9759 10566 11187 11184 13904 11185 11186 9957 8960 14937 <NA> 8679 12100]
Columna: currency_id
[ 1 33  2]
Columna: amount_total
[7.2600000e+02 4.3560000e+01 6.0500000e+01 1.0000000e+00 1.2200000e+01
 1.0000000e+01 0.0000000e+00 4.6400000e+04 4.5820000e+04 1.0035000e+04
 9.2800000e+03 3.4800000e+03 1.3920000e+05 6.3644966e+05 4.0600000e+04
 7.0800000e+02 1.4140000e+03 7.6137760e+04 1.2760000e+05 1.1488524e+05
 9.2624840e+04 9.8901600e+03 2.7980360e+04 7.0794800e+04 1.0440000e+05
 1.

Parece ser que todas las facturas sin nombre, son pruebas o drafts de facturas que no llegaron a venta, por tanto se pueden eliminar:

In [25]:
size_na_name = len(invoices_na_name)
count_no_invoice = 0
count_draft_invoice = 0
invoices_no_draft = []
for id in invoices_na_name['id']:
    lines = asyncio.run(data_retriever.get_invoice_line_by_invoice_id(invoice_id=id))
    if len(lines) > 0:
        if 'Draft Invoice' in lines[0]['move_id'][1]:
            count_draft_invoice += 1
        else:
            invoices_no_draft.append(lines)
    else:
        count_no_invoice += 1
print(f"Total invoices with NA name: {size_na_name}")
print(f"Invoices with no invoice lines: {count_no_invoice}")
print(f"Invoices with draft invoice lines: {count_draft_invoice}")
print(f"Invoices with invoice lines but no draft: {len(invoices_no_draft)}")
invoices_no_draft[0]

Total invoices with NA name: 40
Invoices with no invoice lines: 3
Invoices with draft invoice lines: 34
Invoices with invoice lines but no draft: 3


[{'id': 646426,
  'move_id': [195203, '(* 195203) (Pruebas)'],
  'product_id': [46, '[AMAZON DSP (ELOGIA)] AMAZON DSP'],
  'quantity': 1.0,
  'price_unit': 0.0,
  'tax_ids': [102]},
 {'id': 646427,
  'move_id': [195203, '(* 195203) (Pruebas)'],
  'product_id': False,
  'quantity': 0.0,
  'price_unit': 0.0,
  'tax_ids': []}]

Elimino facturas sin nombre

In [26]:
invoices_df = invoices_df.dropna(subset=['name'])

Miro duplicados de nombres

In [27]:
invoices_df[invoices_df['name'].duplicated()].tail()

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
24268,9688,INV1/2022/00002,out_invoice,paid,3,8663,1,12195.41,0.0,2022-12-31,2023-02-18,21/02/2023,Elogia Media S.L.,Irmaos Vila Nova SA,EUR
24272,12146,INV1/2022/00001,out_invoice,paid,7,7454,1,2770.9,0.0,2022-12-31,2023-03-10,13/03/2023,Tandem Trade Marketing SL,"Vent de France La Tramuntana, S.A.",EUR
24273,11093,INV1/2022/00001,out_invoice,paid,2,8290,1,30.25,0.0,2022-12-31,2023-01-15,27/01/2023,Ibrands Medios Interactivos SL,"Into The Marketing Media Group, S.L.",EUR
24274,10330,INV1/2022/00001,out_invoice,paid,6,8619,1,5375.0,0.0,2022-12-31,2023-02-15,17/03/2023,Marketing4ecommerce Digital Content SL,"Amazon Payments Europe, S.C.A.",EUR
24275,9687,INV1/2022/00001,out_invoice,paid,3,8933,1,17272.33,0.0,2022-12-31,2023-03-15,17/03/2023,Elogia Media S.L.,"Zambon, S.A.U.",EUR


Tienen problemas de nombres únicos para facturas entre empresas, 502 facturas con nombres duplicados pero mayoritariamente de diferentes empresas (solo 3 facturas de la misma empresa con el mismo nombre), de momento no afecta demasiado pero está bien saberlo

In [28]:
print(f"Facturas con nombres iguales: {len(invoices_df[invoices_df['name'].duplicated()])}" )
print(f"Facturas con nombre y company_id iguales: {len(invoices_df[invoices_df.duplicated(subset=['name', 'company_id'])])}")
invoices_df[invoices_df.duplicated(subset=['name', 'company_id'])]

Facturas con nombres iguales: 475
Facturas con nombre y company_id iguales: 3


Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
1166,197173,FACT-2025-XYZ,out_invoice,not_paid,1,,1,0.0,0.0,,2025-07-23,,"Grupo Viko Digital Marketing, S.A.",,EUR
1167,197172,FACT-2025-XYZ,out_invoice,not_paid,1,123.0,1,484.0,484.0,2025-07-23,2025-07-23,,"Grupo Viko Digital Marketing, S.A.",María Sieiro Alfonsin,EUR
1168,197171,FACT-2025-XYZ,out_invoice,not_paid,1,123.0,1,484.0,484.0,2025-07-23,2025-07-23,,"Grupo Viko Digital Marketing, S.A.",María Sieiro Alfonsin,EUR


In [29]:
invoices_df['name'].info()
invoices_df['name'].nunique()
invoices_df['name'].isna().sum()

<class 'pandas.core.series.Series'>
Index: 23931 entries, 0 to 24287
Series name: name
Non-Null Count  Dtype 
--------------  ----- 
23931 non-null  object
dtypes: object(1)
memory usage: 373.9+ KB


np.int64(0)

#### payment_state

In [74]:
invoices_df['payment_state'].value_counts()

payment_state
paid          18931
not_paid       4839
reversed        317
in_payment      141
partial          20
Name: count, dtype: int64

**Facturas revertidas**

Una factura revertida puede ser por muchos motivos, desde una devolución legítima, una cancelación o un error.

In [16]:
reversed_invoices = invoices_df[invoices_df['payment_state'] == 'reversed']

In [17]:
valid_reversed_ids = {}
for id in reversed_invoices['id']:
    reversed_move_id = asyncio.run(odoo_connection.search_read('account.move' , [('id', '=', id)], ['reversal_move_id'], offset=0, limit=0))
    if reversed_move_id and reversed_move_id[0]['reversal_move_id']:
        valid_reversed_ids[id] = reversed_move_id[0]['reversal_move_id'][0]
print(valid_reversed_ids)


{163920: 182474, 166015: 184456, 165692: 185246, 165681: 185244, 165569: 166018, 163760: 177520, 156496: 182458, 167887: 173420, 158252: 184454, 150392: 184332, 149861: 173419, 136528: 182456, 138016: 174183, 136522: 194545, 137970: 173418, 132314: 140543, 120248: 138015, 127142: 194543, 128644: 173417, 129147: 184331, 127978: 136606, 127272: 137783, 128445: 173416, 118437: 138888, 118611: 160300, 118430: 173415, 118237: 136713, 111526: 182448, 118385: 118738, 103604: 112508, 98854: 112506, 102722: 143785, 103388: 136550, 98855: 136326, 98859: 136328, 104476: 109536, 104294: 109534, 82969: 112504, 80681: 161319, 88466: 101264, 88353: 101259, 88288: 100624, 88180: 101255, 87677: 101249, 87672: 101245, 87578: 89245, 87520: 101243, 87244: 100633, 77841: 114825, 77501: 89485, 77840: 89484, 77062: 114823, 77065: 185739, 85759: 100619, 85572: 100621, 84983: 100629, 81595: 92909, 75074: 80694, 75073: 160381, 65705: 109539, 72325: 114821, 67725: 98568, 70611: 120118, 70323: 80693, 81042: 10061

Las facturas que tienen otra factura asociada con la revertida son todas devoluciones, las que no tienen no hay forma de saber exactamente el motivo, por tanto, dada la baja concentración de estas en todo el dataset, simplemente eliminaré las filas

In [18]:
reversal_invoices = []
reversal_invoices_move_type = []
for reversed_id, original_id in valid_reversed_ids.items():
    reversal_invoices.append(asyncio.run(data_retriever.get_invoice_by_id(invoice_id=original_id)))
    reversal_invoices_move_type.append(reversal_invoices[-1]['move_type'])
print(reversal_invoices_move_type)

['out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_refund', 'out_

Elimino las facturas revertidas

In [19]:
invoices_df = invoices_df[~invoices_df['id'].isin(reversed_invoices['id'])]

In [20]:
invoices_df['payment_state'].value_counts()

payment_state
paid          18931
not_paid       4879
in_payment      141
partial          20
Name: count, dtype: int64

In [30]:
invoices_df

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
0,198507,403-0199881-4444363,out_invoice,not_paid,7,1,1,43.560,43.560,2025-11-10,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
1,198549,FVM/2025/00068,out_invoice,not_paid,6,14913,1,121.000,121.000,2025-11-07,2025-12-07,,Marketing4ecommerce Digital Content SL,"Ser Sport, S.L.",EUR
3,198522,ES501155NOOJRS,out_invoice,not_paid,7,1,1,116.160,116.160,2025-11-06,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
4,198516,ES501152NOOJRS,out_invoice,not_paid,7,1,1,43.560,43.560,2025-11-06,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
6,198471,FVM/2025/00067,out_invoice,not_paid,6,8870,1,2420.000,2420.000,2025-11-05,2025-11-05,,Marketing4ecommerce Digital Content SL,Hero España SA,EUR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24283,1043,FVK/2022/00005,out_invoice,paid,5,7812,1,242.000,0.000,2022-10-31,2023-02-20,23/02/2023,Kraz Data Solutions SL,"Salvetti & Llombart, S.L.",EUR
24284,1042,FVK/2022/00004,out_invoice,paid,5,7485,1,9075.000,0.000,2022-10-31,2023-01-02,06/01/2023,Kraz Data Solutions SL,"Unilever España, S.A.",EUR
24285,1041,FVK/2022/00003,out_invoice,paid,5,7773,1,5808.000,0.000,2022-10-31,2022-12-30,02/01/2023,Kraz Data Solutions SL,Kave Home S.L.,EUR
24286,1040,FVK/2022/00002,out_invoice,paid,5,7409,1,4961.000,0.000,2022-10-31,2023-01-25,30/01/2023,Kraz Data Solutions SL,"Ferrer Internacional, S.A.",EUR


**Facturas en proceso de pago**

Parece que todas las facturas en proceso de pago no tienen cantidades por pagar en amount_residual a diferencia de las facturas no pagas o parcialmente pagadas. Eso significa normalmente que no están conciliadas, pero como account.payment únicamente tiene pagos salientes de las empresas y no entrantes de los clientes, no puedo comprobarlo.

In [7]:
invoices_in_payment = invoices_df[invoices_df['payment_state'] == 'in_payment']
invoices_in_payment.head()

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
3426,183800,MIR/2025/00185,out_invoice,in_payment,7,10892,1,23.9,0.0,2025-01-30,2025-02-14,03/02/2025,Tandem Trade Marketing SL,Marketplaces España,EUR
3427,183799,MIR/2025/00184,out_invoice,in_payment,7,10892,1,19.9,0.0,2025-01-30,2025-02-14,03/02/2025,Tandem Trade Marketing SL,Marketplaces España,EUR
3428,183798,MIR/2025/00183,out_invoice,in_payment,7,10892,1,26.98,0.0,2025-01-30,2025-02-14,03/02/2025,Tandem Trade Marketing SL,Marketplaces España,EUR
3429,183797,MIR/2025/00182,out_invoice,in_payment,7,10892,1,26.98,0.0,2025-01-30,2025-02-14,03/02/2025,Tandem Trade Marketing SL,Marketplaces España,EUR
3430,183796,MIR/2025/00181,out_invoice,in_payment,7,10892,1,26.98,0.0,2025-01-30,2025-02-14,03/02/2025,Tandem Trade Marketing SL,Marketplaces España,EUR


In [8]:
invoices_with_residual = invoices_df[invoices_df['amount_residual'] > 0]
invoices_with_residual_in_payment = invoices_in_payment[invoices_in_payment['amount_residual'] > 0]
print(f"Facturas en proceso de pago con amount_residual > 0: {len(invoices_with_residual_in_payment)}")
print(f"Facturas con amount_residual > 0: {len(invoices_with_residual)}")
invoices_with_residual.head()

Facturas en proceso de pago con amount_residual > 0: 0
Facturas con amount_residual > 0: 4855


Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
0,198507,403-0199881-4444363,out_invoice,not_paid,7,1,1,43.56,43.56,2025-11-10,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
1,198549,FVM/2025/00068,out_invoice,not_paid,6,14913,1,121.0,121.0,2025-11-07,2025-12-07,,Marketing4ecommerce Digital Content SL,"Ser Sport, S.L.",EUR
3,198522,ES501155NOOJRS,out_invoice,not_paid,7,1,1,116.16,116.16,2025-11-06,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
4,198516,ES501152NOOJRS,out_invoice,not_paid,7,1,1,43.56,43.56,2025-11-06,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
6,198471,FVM/2025/00067,out_invoice,not_paid,6,8870,1,2420.0,2420.0,2025-11-05,2025-11-05,,Marketing4ecommerce Digital Content SL,Hero España SA,EUR


In [35]:
invoices_df.to_pickle("invoices_cleanedv1.pkl")

In [6]:
invoices_df = pd.read_pickle("invoices_cleanedv1.pkl")

Voy a ver si en account.move.line puedo encontrar más información.

In [None]:
invoice_lines = []
for id in invoices_in_payment['id']:
    lines = asyncio.run(data_retriever.get_invoice_line_by_invoice_id(invoice_id=id))
    invoice_lines.append(lines)


In [16]:
flat_list = [item for sublist in invoice_lines for item in sublist]
invoice_lines_in_payment = pd.DataFrame(flat_list)

In [21]:
invoices_lines_in_payment

Unnamed: 0,id,move_id,product_id,quantity,price_unit,tax_ids,reconciled,blocked,date_maturity,debit,credit,balance,amount_residual,currency_id,company_id,discount,discount_percentage,full_reconcile_id,is_downpayment,reconcile_model_id
0,610843,"[183800, MIR/2025/00185 (7004977564677)]","[240, [752460] JW Red 1L 06X01...",1.000,19.750,[701],False,False,,0.000,19.750,-19.750,0.000,"[1, EUR]","[7, Tandem Trade Marketing SL]",0.000,0.000,,False,False
1,610844,"[183800, MIR/2025/00185 (7004977564677)]",,0.000,0.000,,False,False,,0.000,4.150,-4.150,0.000,"[1, EUR]","[7, Tandem Trade Marketing SL]",0.000,0.000,,False,False
2,610845,"[183800, MIR/2025/00185 (7004977564677)]",,0.000,0.000,,True,False,2025-02-14,23.900,0.000,23.900,0.000,"[1, EUR]","[7, Tandem Trade Marketing SL]",0.000,0.000,"[57259, A57259]",False,False
3,610840,"[183799, MIR/2025/00184 (6921669964677)]","[241, [752394] JW Red 70cl 06X01...",1.000,16.450,[701],False,False,,0.000,16.450,-16.450,0.000,"[1, EUR]","[7, Tandem Trade Marketing SL]",0.000,0.000,,False,False
4,610841,"[183799, MIR/2025/00184 (6921669964677)]",,0.000,0.000,,False,False,,0.000,3.450,-3.450,0.000,"[1, EUR]","[7, Tandem Trade Marketing SL]",0.000,0.000,,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
487,35852,"[16908, FVE/2023/00026]","[12, [UX (ELOGIA CRO)] UX]",1.000,1000.000,[302],False,False,,0.000,1000.000,-1000.000,0.000,"[1, EUR]","[3, Elogia Media S.L.]",0.000,0.000,,False,False
488,35853,"[16908, FVE/2023/00026]","[14, [SEO (ELOGIA CRO)] SEO]",1.000,600.000,[302],False,False,,0.000,600.000,-600.000,0.000,"[1, EUR]","[3, Elogia Media S.L.]",0.000,0.000,,False,False
489,35854,"[16908, FVE/2023/00026]","[11, [FEES PAID (ELOGIA)] FEES PAID MEDIA]",1.000,500.000,[302],False,False,,0.000,500.000,-500.000,0.000,"[1, EUR]","[3, Elogia Media S.L.]",0.000,0.000,,False,False
490,35855,"[16908, FVE/2023/00026]",,0.000,0.000,,False,False,,0.000,924.000,-924.000,0.000,"[1, EUR]","[3, Elogia Media S.L.]",0.000,0.000,,False,False


Parece ser que todas las facturas tienen lineas asociadas

In [None]:
invoices_lines_in_payment = odoo_missing_values_to_null(invoice_lines_in_payment)
invoices_lines_in_payment.info()

Index(['move_id', 'product_id', 'tax_ids', 'date_maturity', 'currency_id',
       'company_id', 'full_reconcile_id'],
      dtype='object')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 492 entries, 0 to 491
Data columns (total 20 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   id                   492 non-null    int64  
 1   move_id              492 non-null    object 
 2   product_id           210 non-null    object 
 3   quantity             492 non-null    float64
 4   price_unit           492 non-null    float64
 5   tax_ids              210 non-null    object 
 6   reconciled           492 non-null    bool   
 7   blocked              492 non-null    bool   
 8   date_maturity        141 non-null    object 
 9   debit                492 non-null    float64
 10  credit               492 non-null    float64
 11  balance              492 non-null    float64
 12  amount_residual      492 non-null    float64
 13  

  df[object_cols] = df[object_cols].applymap(lambda x: np.nan if x == [] else x)


In [23]:
invoices_lines_in_payment['move_id'] = invoices_lines_in_payment['move_id'].apply(lambda x: x[0] if isinstance(x, list) else pd.NA)

In [25]:
invoices_lines_in_payment['move_id'].nunique()

141

Aunque las facturas aparecen en proceso de pago, en las lineas de la factura aparecen reconciliadas, voy a comparar con una factura no pagada

In [27]:
invoices_lines_in_payment['reconciled'].value_counts()

reconciled
False    350
True     142
Name: count, dtype: int64

En el caso de la factura impagada, no aparece reconciliada, por tanto asumo que es un error y algunas facturas en proceso de pago si que estan pagadas

In [31]:
lines_invoice_not_paid = asyncio.run(data_retriever.get_invoice_line_by_invoice_id(198522))
pd.DataFrame(lines_invoice_not_paid)['reconciled']

0    False
1    False
2    False
3    False
4    False
Name: reconciled, dtype: bool

En las facturas pagadas si que aparece y coincide con el balance el total de la factura

In [50]:
lines_invoice_paid = asyncio.run(data_retriever.get_invoice_line_by_invoice_id(174681))
print("Reconciled: ", lines_invoice_paid[2]['reconciled'])
print("Total amount paid:", invoices_df[invoices_df['id'] == 174681]['amount_total'].values[0])
print("Total balance line: ", lines_invoice_paid[2]['balance'])
pd.DataFrame(lines_invoice_paid)['reconciled']
lines_invoice_paid[2]['balance'] == invoices_df[invoices_df['id'] == 174681]['amount_total']

Reconciled:  True
Total amount paid: 10.2
Total balance line:  10.2


7037    True
Name: amount_total, dtype: bool

Compruebo que no tengan cantidades restantes por pagar y no es el caso

In [84]:
invoices_lines_in_payment['amount_residual'].mean()

np.float64(0.0)

Comprobando que todas las facturas en proceso de pago tengan el mismo balance que el monto total, se puede ver que todas menos una estan pagadas al 100%

In [55]:
invoices_lines_in_payment_reconciled = invoices_lines_in_payment[invoices_lines_in_payment['reconciled'] == True]
reconciled_in_payment = 0
not_paid = []
for move_id in invoices_lines_in_payment_reconciled['move_id']:
    related_invoice = invoices_in_payment[invoices_in_payment['id'] == move_id]
    if related_invoice['amount_total'].values[0] == invoices_lines_in_payment_reconciled[invoices_lines_in_payment_reconciled['move_id'] == move_id]['balance'].values[0]:
        reconciled_in_payment += 1
    else:
        not_paid.append(move_id)
print("Reconciled and fully paid invoices in 'in_payment' state:", reconciled_in_payment)


Reconciled and fully paid invoices in 'in_payment' state: 140


Parece un error simplemente, -1680 + 268,8 = 1948,8, y amount_residual = 0, por tanto también está pagada

In [None]:
invoice_lines_in_payment[invoice_lines_in_payment['move_id'] == 153724]

Unnamed: 0,id,move_id,product_id,quantity,price_unit,tax_ids,reconciled,blocked,date_maturity,debit,credit,balance,amount_residual,currency_id,company_id,discount,discount_percentage,full_reconcile_id,is_downpayment,reconcile_model_id
475,510856,153724,"[22, [CSP (IBRANDS)] CONSUPERMISO]",42.0,40.0,[1002],False,False,,0.0,1680.0,-1680.0,0.0,"[33, MXN]","[12, IBRANDS MEDIOS INTERACTIVOS DE MEXICO]",0.0,0.0,,False,False
476,510857,153724,,0.0,0.0,,True,False,,0.0,268.8,-268.8,0.0,"[33, MXN]","[12, IBRANDS MEDIOS INTERACTIVOS DE MEXICO]",0.0,0.0,"[50388, A50388]",False,False
477,510858,153724,,0.0,0.0,,True,False,2024-11-21,1948.8,0.0,1948.8,0.0,"[33, MXN]","[12, IBRANDS MEDIOS INTERACTIVOS DE MEXICO]",0.0,0.0,"[50389, A50389]",False,False


In [58]:
invoices_df[invoices_df['id'] == 153724]

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
9324,153724,IBR202300032,out_invoice,in_payment,12,8987,33,1948.8,0.0,2024-10-22,2024-11-21,29/11/2024,IBRANDS MEDIOS INTERACTIVOS DE MEXICO,"ASALES, SERVICIOS DE MARKETING EN INTERNET",MXN


Convierto las facturas con estado de en proceso de pago en pagadas

In [61]:
invoices_df['payment_state'] = invoices_df['payment_state'].apply(lambda x: 'paid' if x == 'in_payment' else x)

In [62]:
invoices_df.to_pickle("invoices_cleanedv3.pkl")

**Facturas pagadas parcialmente**

In [9]:
invoices_df = pd.read_pickle("invoices_cleanedv3.pkl")

In [10]:
invoices_df['payment_state'].value_counts()

payment_state
paid        19072
not_paid     4839
partial        20
Name: count, dtype: int64

In [11]:
invoices_partial = invoices_df[invoices_df['payment_state'] == 'partial']
invoices_partial

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
3033,189107,FVM/2025/00051,out_invoice,partial,6,13255,1,709.5,104.5,2025-02-07,2025-03-09,12/03/2025,Marketing4ecommerce Digital Content SL,Parafarma2go S.L.,EUR
3147,185076,FVM/2025/00023,out_invoice,partial,6,9294,1,4840.0,2420.0,2025-01-31,2025-03-15,10/02/2025,Marketing4ecommerce Digital Content SL,"Product Hackers, S.L.",EUR
5031,178448,INT/2024/00026,out_invoice,partial,1,13260,1,50609.59,25609.59,2024-12-31,2025-01-21,22/01/2025,"Grupo Viko Digital Marketing, S.A.",Ideas y Estrategia Digital SL,EUR
5035,174442,INT/2024/00022,out_invoice,partial,1,10,1,147254.4,30473.85,2024-12-31,2024-12-31,"27/12/2024, 27/12/2024, 27/12/2024, 16/12/2024...","Grupo Viko Digital Marketing, S.A.",Elogia Media S.L.,EUR
5109,174403,FVE/2024/03004,out_invoice,partial,3,13260,1,6980.29,0.01,2024-12-31,2025-01-14,15/01/2025,Elogia Media S.L.,Ideas y Estrategia Digital SL,EUR
10452,149233,FVE/2024/02077,out_invoice,partial,3,8527,1,1394.58,0.01,2024-09-30,2024-11-30,29/11/2024,Elogia Media S.L.,Fundació Pasqual Maragall,EUR
10467,149216,FVE/2024/02062,out_invoice,partial,3,13,1,5445.0,3630.0,2024-09-30,2024-10-08,,Elogia Media S.L.,Marketing4ecommerce Digital Content SL,EUR
11676,141878,FVE/2024/01812,out_invoice,partial,3,10384,1,8167.5,2330.73,2024-08-31,2024-09-30,16/02/2025,Elogia Media S.L.,"Grupo Instituto Médico Láser, S.L.U.",EUR
12124,139026,ELM202400377,out_invoice,partial,8,12056,33,376924.09,0.09,2024-08-14,2024-09-13,13/09/2024,INICIATIVAS VIRTUALES DE MEXICO,COMERCIALIZADORA ALMACENES GARCIA DE MEXICO,MXN
14217,115046,AMZN-2024-1,out_invoice,partial,3,9308,1,100.97,26.97,2024-05-13,2024-05-29,"01/10/2024, 01/10/2024, 01/10/2024",Elogia Media S.L.,AMAZON INVOICING SPAIN,EUR


Voy a sacar las lineas de la factura para asegurarme de que realmente esten parcialmente pagadas

In [12]:
invoice_lines_partial = []
for id in invoices_partial['id']:
    lines = asyncio.run(data_retriever.get_invoice_line_by_invoice_id(invoice_id=id))
    print(f"Invoice ID: {id}, Lines retrieved: {len(lines)}")
    invoice_lines_partial.extend(lines)
invoice_lines_partial_df = pd.DataFrame(invoice_lines_partial)

Invoice ID: 189107, Lines retrieved: 3
Invoice ID: 185076, Lines retrieved: 3
Invoice ID: 178448, Lines retrieved: 3
Invoice ID: 174442, Lines retrieved: 3
Invoice ID: 174403, Lines retrieved: 5
Invoice ID: 149233, Lines retrieved: 4
Invoice ID: 149216, Lines retrieved: 3
Invoice ID: 141878, Lines retrieved: 5
Invoice ID: 139026, Lines retrieved: 3
Invoice ID: 115046, Lines retrieved: 3
Invoice ID: 107004, Lines retrieved: 3
Invoice ID: 104262, Lines retrieved: 9
Invoice ID: 98852, Lines retrieved: 3
Invoice ID: 84600, Lines retrieved: 3
Invoice ID: 65548, Lines retrieved: 3
Invoice ID: 65549, Lines retrieved: 3
Invoice ID: 47707, Lines retrieved: 2
Invoice ID: 27894, Lines retrieved: 3
Invoice ID: 22732, Lines retrieved: 3
Invoice ID: 9720, Lines retrieved: 2


In [13]:
invoice_lines_partial_df['move_id'] = invoice_lines_partial_df['move_id'].apply(lambda x: x[0] if isinstance(x, list) else pd.NA)

In [14]:
invoice_lines_partial_df.head()

Unnamed: 0,id,move_id,product_id,quantity,price_unit,tax_ids,reconciled,blocked,date_maturity,debit,credit,balance,amount_residual,currency_id,company_id,discount,discount_percentage,full_reconcile_id,is_downpayment,reconcile_model_id
0,630003,189107,"[27, [PATROCINIOS (M4EC)] PATROCINIOS]",1.0,586.36,[602],False,False,False,0.0,586.36,-586.36,0.0,"[1, EUR]","[6, Marketing4ecommerce Digital Content SL]",0.0,0.0,False,False,False
1,630004,189107,False,0.0,0.0,[],False,False,2025-03-09,709.5,0.0,709.5,104.5,"[1, EUR]","[6, Marketing4ecommerce Digital Content SL]",0.0,0.0,False,False,False
2,630005,189107,False,0.0,0.0,[],False,False,False,0.0,123.14,-123.14,0.0,"[1, EUR]","[6, Marketing4ecommerce Digital Content SL]",0.0,0.0,False,False,False
3,615480,185076,"[24, [CONTENIDOS (M4eC)] CONTENIDOS PROPIOS]",1.0,4000.0,[602],False,False,False,0.0,4000.0,-4000.0,0.0,"[1, EUR]","[6, Marketing4ecommerce Digital Content SL]",0.0,0.0,False,False,False
4,615481,185076,False,0.0,0.0,[],False,False,False,0.0,840.0,-840.0,0.0,"[1, EUR]","[6, Marketing4ecommerce Digital Content SL]",0.0,0.0,False,False,False


No estan ni reconciliadas ni con 0 en el residuo por pagar, pero las facturas 174403, 149233, 139026, 104262 y 47707 tienen valores demasiado bajos por pagar, las voy a considerar como pagadas

In [15]:
for id in invoices_partial['id']:
    sum_balance = 0
    amount_total = invoices_partial[invoices_partial['id'] == id]['amount_total'].values[0]
    amount_residual = invoices_partial[invoices_partial['id'] == id]['amount_residual'].values[0]
    for line in invoice_lines_partial_df[invoice_lines_partial_df['move_id'] == id]['amount_residual']:
        sum_balance += line
    print(f"Invoice ID: {id}, Amount Total: {amount_total}, Amount Residual: {amount_residual}, Sum of Line Residuals: {sum_balance}")

Invoice ID: 189107, Amount Total: 709.5, Amount Residual: 104.5, Sum of Line Residuals: 104.5
Invoice ID: 185076, Amount Total: 4840.0, Amount Residual: 2420.0, Sum of Line Residuals: 2420.0
Invoice ID: 178448, Amount Total: 50609.59, Amount Residual: 25609.59, Sum of Line Residuals: 25609.59
Invoice ID: 174442, Amount Total: 147254.4, Amount Residual: 30473.85, Sum of Line Residuals: 30473.85
Invoice ID: 174403, Amount Total: 6980.29, Amount Residual: 0.01, Sum of Line Residuals: 0.01
Invoice ID: 149233, Amount Total: 1394.58, Amount Residual: 0.01, Sum of Line Residuals: 0.01
Invoice ID: 149216, Amount Total: 5445.0, Amount Residual: 3630.0, Sum of Line Residuals: 3630.0
Invoice ID: 141878, Amount Total: 8167.5, Amount Residual: 2330.73, Sum of Line Residuals: 2330.73
Invoice ID: 139026, Amount Total: 376924.09, Amount Residual: 0.09, Sum of Line Residuals: 0.08
Invoice ID: 115046, Amount Total: 100.97, Amount Residual: 26.97, Sum of Line Residuals: 26.97
Invoice ID: 107004, Amount T

In [16]:
to_paid_invoices = [174403, 149233, 139026, 104262, 47707]
invoices_df['payment_state'] = invoices_df.apply(lambda row: 'paid' if row['id'] in to_paid_invoices else row['payment_state'], axis=1)

Quedan 15 facturas parciales, dada la pequeña cantidad, no tiene sentido hacer una clase únicamente para las parcialmente pagadas, por tanto, las moveré a la clase de no pagadas

In [17]:
invoices_df['payment_state'].value_counts()

payment_state
paid        19077
not_paid     4839
partial        15
Name: count, dtype: int64

In [18]:
invoices_df['payment_state'] = invoices_df['payment_state'].apply(lambda x: 'not_paid' if x == 'partial' else x)
invoices_df['payment_state'].value_counts()

payment_state
paid        19077
not_paid     4854
Name: count, dtype: int64

**Pagadas**

Simplemente voy a comprobar que realmente esten pagadas a partir de las lineas de la factura y parece que todas estan pagadas, algunos pequeños valores en amount_residual no relevantes

In [None]:
error_paid_invoices = []
paid_invoices = invoices_df[invoices_df['payment_state'] == 'paid']
for id in paid_invoices['id']:
    lines = asyncio.run(data_retriever.get_invoice_line_by_invoice_id(invoice_id=id))
    for line in lines:
        amount_residual = line['amount_residual']
        if amount_residual > 0:
            print(f"Invoice ID: {id}, Line ID: {line['id']}, Amount Residual: {amount_residual}")
            error_paid_invoices.append(id)
print(f"Total paid invoices with residual amount > 0: {len(error_paid_invoices)}")

    

Exception in callback Task.__step()
handle: <Handle Task.__step()>
Traceback (most recent call last):
  File "c:\Users\Ismae\miniconda3\Lib\asyncio\events.py", line 89, in _run
    self._context.run(self._callback, *self._args)
    ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: cannot enter context: <_contextvars.Context object at 0x000001E8F27A8980> is already entered
Exception in callback Task.__step()
handle: <Handle Task.__step()>
Traceback (most recent call last):
  File "c:\Users\Ismae\miniconda3\Lib\asyncio\events.py", line 89, in _run
    self._context.run(self._callback, *self._args)
    ~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: cannot enter context: <_contextvars.Context object at 0x000001E8F27A8980> is already entered
Exception in callback Task.__step()
handle: <Handle Task.__step()>
Traceback (most recent call last):
  File "c:\Users\Ismae\miniconda3\Lib\asyncio\events.py", line 89, in _run
    self._context.run(self._callback, *self._args)


Invoice ID: 174403, Line ID: 580724, Amount Residual: 0.01
Invoice ID: 149233, Line ID: 493136, Amount Residual: 0.01
Invoice ID: 139026, Line ID: 458926, Amount Residual: 0.09


**No pagadas**

Comprobaré que realmente no esten pagadas

In [24]:
invoice_lines_df_original['move_id'] = invoice_lines_df_original['move_id'].apply(lambda x: x[0] if isinstance(x, list) else pd.NA)

Parece que todas estan sin pagar, ni estan reconciliadas ni tienen amount_residual != 0 (los resultados del código de abajo, mirando caso por caso, realmente no estan pagadas las facturas)

In [28]:
error_not_paid_invoices = []
not_paid_invoices = invoices_df[invoices_df['payment_state'] == 'not_paid']
for id in not_paid_invoices['id']:
    lines = invoice_lines_df_original[invoice_lines_df_original["move_id"] == id]
    if len(lines) > 0:
        amount_residual = lines.iloc[-1]['amount_residual']
        if amount_residual == 0: #and lines.iloc[-1]['reconciled'] == True:
            print(f"Invoice ID: {id}, Line ID: {lines.iloc[-1]['id']}, Amount Residual: {amount_residual}")
            error_not_paid_invoices.append(id)

Invoice ID: 196189, Line ID: 649275, Amount Residual: 0.0
Invoice ID: 194730, Line ID: 644851, Amount Residual: 0.0
Invoice ID: 194544, Line ID: 644190, Amount Residual: 0.0
Invoice ID: 189032, Line ID: 629454, Amount Residual: 0.0
Invoice ID: 195124, Line ID: 646163, Amount Residual: 0.0
Invoice ID: 195012, Line ID: 645879, Amount Residual: 0.0
Invoice ID: 195000, Line ID: 645831, Amount Residual: 0.0
Invoice ID: 191140, Line ID: 639779, Amount Residual: 0.0
Invoice ID: 194223, Line ID: 643177, Amount Residual: 0.0
Invoice ID: 189544, Line ID: 631725, Amount Residual: 0.0
Invoice ID: 188676, Line ID: 628020, Amount Residual: 0.0
Invoice ID: 186423, Line ID: 620815, Amount Residual: 0.0
Invoice ID: 185096, Line ID: 615554, Amount Residual: 0.0
Invoice ID: 186224, Line ID: 619051, Amount Residual: 0.0
Invoice ID: 185259, Line ID: 616208, Amount Residual: 0.0
Invoice ID: 185257, Line ID: 616202, Amount Residual: 0.0
Invoice ID: 180619, Line ID: 601834, Amount Residual: 0.0
Invoice ID: 18

In [32]:
invoice_lines_df_original[invoice_lines_df_original["move_id"] == 185257]


Unnamed: 0,id,move_id,product_id,quantity,price_unit,tax_ids,reconciled,blocked,date_maturity,debit,credit,balance,amount_residual,currency_id,company_id,discount,discount_percentage,full_reconcile_id,is_downpayment,reconcile_model_id
9178,616200,185257,"[99, [ACADEMY DIRECT] FORMACION DIRECTA]",1.0,4760.0,[902],False,False,False,0.0,4760.0,-4760.0,0.0,"[1, EUR]","[11, DigitalPla2021, S.L.]",0.0,0.0,False,False,False
9179,616201,185257,False,0.0,0.0,[],False,False,2025-03-19,5759.6,0.0,5759.6,5759.6,"[1, EUR]","[11, DigitalPla2021, S.L.]",0.0,0.0,False,False,False
9180,616202,185257,False,0.0,0.0,[],False,False,False,0.0,999.6,-999.6,0.0,"[1, EUR]","[11, DigitalPla2021, S.L.]",0.0,0.0,False,False,False


In [34]:
invoices_df.to_pickle("invoices_cleanedv4.pkl")

#### partner_id && partner name

4 facturas sin partner, voy a investigar si puedo sacar algo de las lineas de las facturas

In [40]:
invoices_df['partner_id'].info()
print(invoices_df['partner_id'].isna().sum())
print(invoices_df['partner_id'].nunique())

<class 'pandas.core.series.Series'>
Index: 23931 entries, 0 to 24287
Series name: partner_id
Non-Null Count  Dtype 
--------------  ----- 
23927 non-null  object
dtypes: object(1)
memory usage: 373.9+ KB
4
1517


In [41]:
invoices_without_partner = invoices_df[invoices_df['partner_id'].isna()]
invoices_without_partner

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
1164,197175,FACT-2025-XYZ,out_invoice,not_paid,7,,1,0.0,0.0,,2025-07-23,,Tandem Trade Marketing SL,,EUR
1165,197174,FACT-2025-XYZ,out_invoice,not_paid,1,,1,0.0,0.0,,2025-07-23,,"Grupo Viko Digital Marketing, S.A.",,EUR
1166,197173,FACT-2025-XYZ,out_invoice,not_paid,1,,1,0.0,0.0,,2025-07-23,,"Grupo Viko Digital Marketing, S.A.",,EUR
3105,181418,INT/2025/00001,out_invoice,not_paid,8,,33,0.0,0.0,,2025-02-03,,INICIATIVAS VIRTUALES DE MEXICO,,MXN


No tienen lineas de factura asociadas, asumo que son pruebas o errores, las elimino y ya

In [None]:

lines_temp_df = pd.DataFrame()
invoice_lines_df = invoice_lines_df_original.copy()
for id in invoices_without_partner['id']:
    lines = invoice_lines_df[invoice_lines_df['move_id'] == id]
    print(lines)
    lines_temp_df = pd.concat([lines_temp_df, lines], ignore_index=True)
lines_temp_df


Empty DataFrame
Columns: [id, move_id, product_id, quantity, price_unit, tax_ids, reconciled, blocked, date_maturity, debit, credit, balance, amount_residual, currency_id, company_id, discount, discount_percentage, full_reconcile_id, is_downpayment, reconcile_model_id]
Index: []
Empty DataFrame
Columns: [id, move_id, product_id, quantity, price_unit, tax_ids, reconciled, blocked, date_maturity, debit, credit, balance, amount_residual, currency_id, company_id, discount, discount_percentage, full_reconcile_id, is_downpayment, reconcile_model_id]
Index: []
Empty DataFrame
Columns: [id, move_id, product_id, quantity, price_unit, tax_ids, reconciled, blocked, date_maturity, debit, credit, balance, amount_residual, currency_id, company_id, discount, discount_percentage, full_reconcile_id, is_downpayment, reconcile_model_id]
Index: []
Empty DataFrame
Columns: [id, move_id, product_id, quantity, price_unit, tax_ids, reconciled, blocked, date_maturity, debit, credit, balance, amount_residual, c

Unnamed: 0,id,move_id,product_id,quantity,price_unit,tax_ids,reconciled,blocked,date_maturity,debit,credit,balance,amount_residual,currency_id,company_id,discount,discount_percentage,full_reconcile_id,is_downpayment,reconcile_model_id


In [48]:

invoices_df = invoices_df.dropna(subset=['partner_id'])

Ahora con el partner name y ya estaria

In [51]:
invoices_df['partner_name'].info()
print(invoices_df['partner_name'].isna().sum())
print(invoices_df['partner_name'].nunique())

<class 'pandas.core.series.Series'>
Index: 23927 entries, 0 to 24287
Series name: partner_name
Non-Null Count  Dtype 
--------------  ----- 
23927 non-null  object
dtypes: object(1)
memory usage: 373.9+ KB
0
1516


In [52]:
invoices_df.to_pickle("invoices_cleanedv4.pkl")

#### amount_total y amount_residual

In [32]:
invoices_df = pd.read_pickle("invoices_cleanedv4.pkl")

Valores muy grandes (probablemente por la moneda), alta concentración de facturas de bajo importe, algunas facturas con valores erroneos en amount_total (0?)

In [33]:
invoices_df[['amount_total', 'amount_residual']].describe()

Unnamed: 0,amount_total,amount_residual
count,23927.0,23927.0
mean,17621.277,8812.942
std,1150393.864,1148984.899
min,0.0,0.0
25%,12.0,0.0
50%,40.01,0.0
75%,2297.44,0.0
max,177676632.0,177676632.0


In [34]:
invoices_df[invoices_df['amount_total'] == 0]

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
2555,195207,FVC/2025/00005,out_invoice,paid,1,13926,1,0.0,0.0,2025-03-26,2025-03-26,,"Grupo Viko Digital Marketing, S.A.",Marketplaces Italia,EUR
2556,195204,FVC/2025/00004,out_invoice,paid,1,14,1,0.0,0.0,2025-03-26,2025-03-26,,"Grupo Viko Digital Marketing, S.A.",Tandem Trade Marketing SL,EUR


Las elimino

In [35]:
invoices_df = invoices_df.drop(invoices_df[invoices_df['amount_total'] == 0].index)

In [36]:
invoices_df[invoices_df['amount_total'] < 1]

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
5136,173716,FVE/2024/02968,out_invoice,paid,3,8299,1,0.97,0.0,2024-12-31,2025-03-01,06/03/2025,Elogia Media S.L.,"Madz Digital Business, S.L.",EUR
7120,166010,FVM/2024/00439,out_invoice,paid,6,12731,1,0.36,0.0,2024-11-30,2024-12-30,12/11/2024,Marketing4ecommerce Digital Content SL,Bitpanda,EUR
11519,142728,FVM/2024/00327,out_invoice,paid,6,12731,1,0.15,0.0,2024-08-31,2024-09-30,13/08/2024,Marketing4ecommerce Digital Content SL,Bitpanda,EUR


In [37]:
invoices_df[['amount_total', 'amount_residual']].describe()

Unnamed: 0,amount_total,amount_residual
count,23925.0,23925.0
mean,17622.75,8813.679
std,1150441.937,1149032.921
min,0.15,0.0
25%,12.0,0.0
50%,40.15,0.0
75%,2297.79,0.0
max,177676632.0,177676632.0


In [38]:
currencies_df = pd.DataFrame(asyncio.run(odoo_connection.search_read('res.currency', [], [], offset=0, limit=0)))

Los rates son erróneos o no actualizados...

In [39]:
# pip install pandas-datareader
currencies_df

Unnamed: 0,id,name,full_name,symbol,rate,inverse_rate,rate_string,rate_ids,rounding,decimal_places,...,currency_subunit_label,is_current_company_currency,__last_update,display_name,create_uid,create_date,write_uid,write_date,display_rounding_warning,l10n_mx_edi_decimal_places
0,8,COP,Colombian peso,$,1.0,1.0,1 EUR = 1.000000 COP,"[15031, 14958]",0.01,2,...,Centavos,False,2024-04-16 11:58:19,COP,"[1, OdooBot]",2022-12-09 14:09:20,"[6, Roger Regales Coll]",2024-04-16 11:58:19,False,2
1,1,EUR,Euro,€,1.0,1.0,,"[33684, 33689, 33694, 33699, 33704, 33709, 337...",0.01,2,...,Cents,True,2024-06-19 08:37:47,EUR,False,False,"[2, Administrator]",2024-06-19 08:37:47,False,2
2,142,GBP,Pound sterling,£,0.841,1.189,1 EUR = 0.840780 GBP,"[33681, 33686, 33691, 33696, 33701, 33706, 337...",0.01,2,...,Penny,False,2023-04-05 11:39:53,GBP,"[1, OdooBot]",2022-12-09 14:09:20,"[2, Administrator]",2023-04-05 11:39:53,False,2
3,33,MXN,Mexican peso,$,22.036,0.045,1 EUR = 22.036400 MXN,"[33740, 33744, 33672, 33676, 33683, 33688, 336...",0.01,2,...,Centavos,False,2023-07-06 14:47:55,MXN,"[1, OdooBot]",2022-12-09 14:09:20,"[6, Roger Regales Coll]",2023-07-06 14:47:55,False,2
4,18,SEK,Swedish krona,kr,10.976,0.091,1 EUR = 10.976500 SEK,"[33682, 33687, 33692, 33697, 33702, 33707, 337...",0.01,2,...,Ore,False,2025-03-05 12:14:29,SEK,"[1, OdooBot]",2022-12-09 14:09:20,"[6, Roger Regales Coll]",2025-03-05 12:14:29,False,2
5,2,USD,United States dollar,$,1.089,0.919,1 EUR = 1.088600 USD,"[33680, 33685, 33690, 33695, 33700, 33705, 337...",0.01,2,...,Cents,False,2023-07-18 12:35:52,USD,"[1, OdooBot]",2022-12-09 14:09:20,"[6, Roger Regales Coll]",2023-07-18 12:35:52,False,2


In [40]:
invoices_df['currency_name'].unique()

array(['EUR', 'SEK', 'MXN', 'USD', 'GBP', 'COP'], dtype=object)

Usaré los datos del banco central europeo

In [41]:
c = CurrencyRates()

rates = {}
rates['COP'] = 0.00022  # Valor fijo temporal
for currency in currencies_df['name']:
    if currency != 'EUR':
        try:
            rate = c.get_rate(currency, 'EUR')
            rates[currency] = rate
        except Exception as e:
            print(f"Error retrieving rate for {currency}: {e}")

invoices_df['amount_total_eur'] = invoices_df.apply(lambda row: row['amount_total'] * rates.get(row['currency_name'], 1) if row['currency_name'] != 'EUR' else row['amount_total'], axis=1)
invoices_df['amount_residual_eur'] = invoices_df.apply(lambda row: row['amount_residual'] * rates.get(row['currency_name'], 1) if row['currency_name'] != 'EUR' else row['amount_residual'], axis=1)

Error retrieving rate for COP: Currency Rates Source Not Ready


In [42]:
invoices_df.to_pickle("invoices_cleanedv5.pkl")

In [82]:
invocies_df = pd.read_pickle("invoices_cleanedv5.pkl")

In [83]:
invoices_df[['amount_total_eur', 'amount_residual_eur']].describe()

Unnamed: 0,amount_total_eur,amount_residual_eur
count,23885.0,23885.0
mean,2457.694,222.422
std,9039.125,2440.76
min,0.15,0.0
25%,12.0,0.0
50%,39.99,0.0
75%,1887.6,0.0
max,633574.15,131605.93


#### invoice_date y invoice_date_due

In [85]:
invoices_df = convert_to_datetime(invoices_df, ['invoice_date', 'invoice_date_due'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.to_datetime(df[col], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col] = pd.to_datetime(df[col], errors='coerce')


In [86]:
invoices_df[['invoice_date', 'invoice_date_due']].info()

<class 'pandas.core.frame.DataFrame'>
Index: 23885 entries, 0 to 24287
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   invoice_date      23885 non-null  datetime64[ns]
 1   invoice_date_due  23885 non-null  datetime64[ns]
dtypes: datetime64[ns](2)
memory usage: 559.8 KB


invoice_date tiene un null, voy a verlo

In [52]:
invoices_df[invoices_df['invoice_date'].isna()]

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name,amount_total_eur,amount_residual_eur
1765,196522,FVV/2025/00001,out_invoice,not_paid,1,8571,1,10.0,10.0,NaT,2025-06-05,,"Grupo Viko Digital Marketing, S.A.",Google Ireland Limited,EUR,10.0,10.0


No tiene lineas de factura elimino y ya está

In [63]:
invoices_lines = pd.read_pickle("invoice_lines.pkl")
invoices_lines[invoices_lines_in_payment['move_id'] == 196522]

Unnamed: 0,id,move_id,product_id,quantity,price_unit,tax_ids,reconciled,blocked,date_maturity,debit,credit,balance,amount_residual,currency_id,company_id,discount,discount_percentage,full_reconcile_id,is_downpayment,reconcile_model_id


In [54]:
invoices_df = invoices_df.dropna(subset=['invoice_date'])

In [55]:
invoices_df.isna().sum()

id                        0
name                      0
move_type                 0
payment_state             0
company_id                0
partner_id                0
currency_id               0
amount_total              0
amount_residual           0
invoice_date              0
invoice_date_due          0
payment_dates          4876
company_name              0
partner_name              0
currency_name             0
amount_total_eur          0
amount_residual_eur       0
dtype: int64

#### Payment_dates

Muchos nulls en payment_dates principalmente por datos censurados, de momento comprobaré que no haya ninguna factura pagada sin fecha

In [62]:
invoices_paid_without_date = invoices_df[(invoices_df['payment_dates'].isna()) & (invoices_df['payment_state'] == 'paid')]
len(invoices_paid_without_date)

39

39 facturas que están pagadas pero sin fecha, voy a intentar buscar las lineas

In [65]:
lines_paid_without_date = invoices_lines[invoices_lines['move_id'].isin(invoices_paid_without_date['id'])]
lines_paid_without_date

Unnamed: 0,id,move_id,product_id,quantity,price_unit,tax_ids,reconciled,blocked,date_maturity,debit,credit,balance,amount_residual,currency_id,company_id,discount,discount_percentage,full_reconcile_id,is_downpayment,reconcile_model_id


No tienen lineas, por tanto como si no existiesen

In [66]:
invoices_df = invoices_df[~invoices_df['id'].isin(invoices_paid_without_date['id'])]

In [69]:
invoices_df.to_pickle("invoices_cleanedv6.pkl")

In [87]:
invoices_df = pd.read_pickle("invoices_cleanedv6.pkl")

Facturas no pagadas pero con fecha:

In [88]:
invoices_unpaid_with_date = invoices_df[(~invoices_df['payment_dates'].isna()) & (invoices_df['payment_state'] == 'unpaid')]
invoices_unpaid_with_date

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name,amount_total_eur,amount_residual_eur


Fechas erróneas o múltiples plazos

In [89]:
invoices_multiple_payment_dates = invoices_df[~invoices_df['payment_dates'].isna() & ~invoices_df['payment_dates'].astype(str).str.match(r'^\d{2}/\d{2}/\d{4}$')]
invoices_multiple_payment_dates


Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name,amount_total_eur,amount_residual_eur
3150,185073,FVM/2025/00020,out_invoice,paid,6,7866,1,37.490,0.000,2025-01-31,2025-03-02,"26/02/2025, 24/02/2025",Marketing4ecommerce Digital Content SL,"Nextalia Ventures, S.L.",EUR,37.490,0.000
5035,174442,INT/2024/00022,out_invoice,not_paid,1,10,1,147254.400,30473.850,2024-12-31,2024-12-31,"27/12/2024, 27/12/2024, 27/12/2024, 16/12/2024...","Grupo Viko Digital Marketing, S.A.",Elogia Media S.L.,EUR,147254.400,30473.850
7082,166310,INT/2024/00021,out_invoice,paid,1,10,1,155428.050,0.000,2024-11-30,2024-12-16,"29/11/2024, 29/11/2024, 25/11/2024, 08/11/2024...","Grupo Viko Digital Marketing, S.A.",Elogia Media S.L.,EUR,155428.050,0.000
7293,164498,FVE/2024/02609,out_invoice,paid,3,8663,1,8654.240,0.000,2024-11-30,2025-01-29,"02/01/2025, 02/12/2024",Elogia Media S.L.,Irmaos Vila Nova SA,EUR,8654.240,0.000
8536,156709,MAC/2024/00046,out_invoice,paid,11,14410,1,6655.000,0.000,2024-11-04,2024-12-12,"12/12/2024, 19/11/2024","DigitalPla2021, S.L.","Playbrands, S.L.",EUR,6655.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24094,9722,INV1/2022/00035,out_invoice,paid,3,8073,1,5808.000,0.000,2022-12-31,2023-02-26,"10/07/2023, 19/04/2023",Elogia Media S.L.,"Simer Grumerosa, S.L.U.",EUR,5808.000,0.000
24131,12241,INV1/2022/00027,out_invoice,paid,2,8935,2,251.630,0.000,2022-12-31,2023-03-01,"26/02/2023, 13/02/2023",Ibrands Medios Interactivos SL,Soicos International AG,USD,217.372,0.000
24185,13094,INV1/2022/00016,out_invoice,paid,8,8974,33,37700.000,0.000,2022-12-05,2023-02-20,"10/11/2023, 20/10/2023, 11/08/2023, 22/05/2023",INICIATIVAS VIRTUALES DE MEXICO,IBC OPERATIONS,MXN,1781.363,0.000
24207,13090,INV1/2022/00012,out_invoice,paid,8,8974,33,37700.000,0.000,2022-12-05,2023-02-20,"22/05/2023, 14/04/2023",INICIATIVAS VIRTUALES DE MEXICO,IBC OPERATIONS,MXN,1781.363,0.000


Para multiples plazos de momento simplemente me quedaré con la última fecha y invoice_date_due:

In [90]:
invoices_df["payment_dates"] = (invoices_df["payment_dates"].astype(str).str.split(r",\s*").str[0])

In [91]:
invoices_df[invoices_df['id'].isin(invoices_multiple_payment_dates['id'])]

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name,amount_total_eur,amount_residual_eur
3150,185073,FVM/2025/00020,out_invoice,paid,6,7866,1,37.490,0.000,2025-01-31,2025-03-02,26/02/2025,Marketing4ecommerce Digital Content SL,"Nextalia Ventures, S.L.",EUR,37.490,0.000
5035,174442,INT/2024/00022,out_invoice,not_paid,1,10,1,147254.400,30473.850,2024-12-31,2024-12-31,27/12/2024,"Grupo Viko Digital Marketing, S.A.",Elogia Media S.L.,EUR,147254.400,30473.850
7082,166310,INT/2024/00021,out_invoice,paid,1,10,1,155428.050,0.000,2024-11-30,2024-12-16,29/11/2024,"Grupo Viko Digital Marketing, S.A.",Elogia Media S.L.,EUR,155428.050,0.000
7293,164498,FVE/2024/02609,out_invoice,paid,3,8663,1,8654.240,0.000,2024-11-30,2025-01-29,02/01/2025,Elogia Media S.L.,Irmaos Vila Nova SA,EUR,8654.240,0.000
8536,156709,MAC/2024/00046,out_invoice,paid,11,14410,1,6655.000,0.000,2024-11-04,2024-12-12,12/12/2024,"DigitalPla2021, S.L.","Playbrands, S.L.",EUR,6655.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24094,9722,INV1/2022/00035,out_invoice,paid,3,8073,1,5808.000,0.000,2022-12-31,2023-02-26,10/07/2023,Elogia Media S.L.,"Simer Grumerosa, S.L.U.",EUR,5808.000,0.000
24131,12241,INV1/2022/00027,out_invoice,paid,2,8935,2,251.630,0.000,2022-12-31,2023-03-01,26/02/2023,Ibrands Medios Interactivos SL,Soicos International AG,USD,217.372,0.000
24185,13094,INV1/2022/00016,out_invoice,paid,8,8974,33,37700.000,0.000,2022-12-05,2023-02-20,10/11/2023,INICIATIVAS VIRTUALES DE MEXICO,IBC OPERATIONS,MXN,1781.363,0.000
24207,13090,INV1/2022/00012,out_invoice,paid,8,8974,33,37700.000,0.000,2022-12-05,2023-02-20,22/05/2023,INICIATIVAS VIRTUALES DE MEXICO,IBC OPERATIONS,MXN,1781.363,0.000


In [93]:
invoices_df = convert_to_datetime(invoices_df, ['payment_dates'])

In [95]:
invoices_df.to_pickle("invoices_cleanedv6.pkl")

In [94]:
invoices_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 23885 entries, 0 to 24287
Data columns (total 17 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   id                   23885 non-null  int64         
 1   name                 23885 non-null  object        
 2   move_type            23885 non-null  object        
 3   payment_state        23885 non-null  object        
 4   company_id           23885 non-null  int64         
 5   partner_id           23885 non-null  object        
 6   currency_id          23885 non-null  int64         
 7   amount_total         23885 non-null  float64       
 8   amount_residual      23885 non-null  float64       
 9   invoice_date         23885 non-null  datetime64[ns]
 10  invoice_date_due     23885 non-null  datetime64[ns]
 11  payment_dates        19048 non-null  datetime64[ns]
 12  company_name         23885 non-null  object        
 13  partner_name         23885 non-null 

#### Eliminación de facturas no útiles

Marketplace no sirve para predecir facturas

In [35]:
partners_df_original = pd.read_pickle("partners.pkl")
partners_df_original = odoo_missing_values_to_null(partners_df_original)
partners_df_original = partners_df_original.dropna(subset=['name'])

marketplace_clients = partners_df_original[partners_df_original['name'].str.contains("Marketplace")]
marketplace_clients_list = marketplace_clients['id'].values.tolist()
marketplace_clients_list

Index(['name', 'email', 'phone', 'street', 'city', 'zip', 'country_id',
       'category_id', 'company_type', 'company_id', 'industry_id', 'vat',
       'invoice_ids', 'trust', 'unpaid_invoice_ids'],
      dtype='object')


  df[object_cols] = df[object_cols].applymap(lambda x: np.nan if x == [] else x)


[14950,
 14967,
 15029,
 14960,
 14951,
 14957,
 14968,
 15028,
 14961,
 14969,
 14958,
 14962,
 14970,
 14971,
 14952,
 14963,
 15030,
 14959,
 14964,
 14972,
 14973,
 14953,
 14965,
 14974,
 14954,
 14966,
 13925,
 14030,
 14617,
 10892,
 13924,
 13926,
 9935]

In [36]:
invoices_df = pd.read_pickle("invoices_cleanedv6.pkl")
invoices_df = invoices_df[~invoices_df['partner_id'].isin(marketplace_clients_list)]
invoices_df.to_pickle("invoices_cleanedv7.pkl")

### 1.2.3. res.partner

In [66]:
partners_df = pd.read_pickle("partners.pkl")

In [67]:
partners_spain = partners_df[partners_df['country_id'].str[0] == 68]
partners_spain['vat'].to_csv("partners_spain_ids.csv", index=False)

Convierto [], "" y False en objetos a null

In [68]:
partners_df = odoo_missing_values_to_null(partners_df)

Index(['name', 'email', 'phone', 'street', 'city', 'zip', 'country_id',
       'category_id', 'company_type', 'company_id', 'industry_id', 'vat',
       'invoice_ids', 'trust', 'unpaid_invoice_ids'],
      dtype='object')


  df[object_cols] = df[object_cols].applymap(lambda x: np.nan if x == [] else x)


In [69]:
partners_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1631 entries, 0 to 1630
Data columns (total 25 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   id                     1631 non-null   int64  
 1   name                   1630 non-null   object 
 2   email                  281 non-null    object 
 3   phone                  12 non-null     object 
 4   street                 1567 non-null   object 
 5   city                   1549 non-null   object 
 6   zip                    1551 non-null   object 
 7   country_id             1616 non-null   object 
 8   customer_rank          1631 non-null   int64  
 9   supplier_rank          1631 non-null   int64  
 10  category_id            284 non-null    object 
 11  is_company             1631 non-null   bool   
 12  company_type           1631 non-null   object 
 13  company_id             569 non-null    object 
 14  credit                 1631 non-null   float64
 15  cred

In [70]:
partners_df.isna().sum()

id                          0
name                        1
email                    1350
phone                    1619
street                     64
city                       82
zip                        80
country_id                 15
customer_rank               0
supplier_rank               0
category_id              1347
is_company                  0
company_type                0
company_id               1062
credit                      0
credit_limit                0
industry_id              1630
vat                       170
invoice_ids                80
total_due                   0
total_invoiced              0
total_overdue               0
trust                       0
unpaid_invoice_ids       1503
unpaid_invoices_count       0
dtype: int64

A simple vista:
- email: prácticamente vacio para la mayoria de clientes
- telefono, category_id, company_id, industry_id, unpaid_invoice_ids: vacio
- una empresa sin nombre
- varios nulls en street, city, zip, country_id, vat y invoice_ids

Elimino las columnas email, phone, category_id, company_id, industry_id y unpaid_invoice_ids, supplier_rank (no sirve)

In [71]:
partners_df = partners_df.drop(columns=['email', 'phone', 'category_id', 'company_id', 'industry_id', 'unpaid_invoice_ids', 'supplier_rank'])
partners_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1631 entries, 0 to 1630
Data columns (total 18 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   id                     1631 non-null   int64  
 1   name                   1630 non-null   object 
 2   street                 1567 non-null   object 
 3   city                   1549 non-null   object 
 4   zip                    1551 non-null   object 
 5   country_id             1616 non-null   object 
 6   customer_rank          1631 non-null   int64  
 7   is_company             1631 non-null   bool   
 8   company_type           1631 non-null   object 
 9   credit                 1631 non-null   float64
 10  credit_limit           1631 non-null   float64
 11  vat                    1461 non-null   object 
 12  invoice_ids            1551 non-null   object 
 13  total_due              1631 non-null   float64
 14  total_invoiced         1631 non-null   float64
 15  tota

In [72]:
partners_df[['credit', 'credit_limit', 'total_due', 'total_invoiced', 'total_overdue', 'unpaid_invoices_count']].describe()

Unnamed: 0,credit,credit_limit,total_due,total_invoiced,total_overdue,unpaid_invoices_count
count,1631.0,1631.0,1631.0,1631.0,1631.0,1631.0
mean,1248.749,0.0,1248.749,30794.313,1248.749,0.302
std,9944.027,0.0,9944.027,154444.803,9944.027,2.618
min,-12507.0,0.0,-12507.0,-19.5,-12507.0,0.0
25%,0.0,0.0,0.0,39.98,0.0,0.0
50%,0.0,0.0,0.0,800.0,0.0,0.0
75%,0.0,0.0,0.0,7714.865,0.0,0.0
max,283217.33,0.0,283217.33,3351155.51,283217.33,84.0


credit_limit y unpaid_invoices_count no sirven

In [73]:
partners_df = partners_df.drop(columns=['credit_limit', 'unpaid_invoices_count'])
partners_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1631 entries, 0 to 1630
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              1631 non-null   int64  
 1   name            1630 non-null   object 
 2   street          1567 non-null   object 
 3   city            1549 non-null   object 
 4   zip             1551 non-null   object 
 5   country_id      1616 non-null   object 
 6   customer_rank   1631 non-null   int64  
 7   is_company      1631 non-null   bool   
 8   company_type    1631 non-null   object 
 9   credit          1631 non-null   float64
 10  vat             1461 non-null   object 
 11  invoice_ids     1551 non-null   object 
 12  total_due       1631 non-null   float64
 13  total_invoiced  1631 non-null   float64
 14  total_overdue   1631 non-null   float64
 15  trust           1631 non-null   object 
dtypes: bool(1), float64(4), int64(2), object(9)
memory usage: 192.9+ KB


In [74]:
partners_df[~partners_df['is_company'] & partners_df['vat'].isna()]


Unnamed: 0,id,name,street,city,zip,country_id,customer_rank,is_company,company_type,credit,vat,invoice_ids,total_due,total_invoiced,total_overdue,trust
8,10970,Aaron Escobar,Cuauhtemoc 123,Ciudad de México,16090,"[156, Mexico]",2,False,person,0.000,,"[63361, 63691, 55911]",0.000,39.980,0.000,normal
12,12023,Abel Hernández,Caldas Da Raihna 6,Badajoz,6011,"[68, Spain]",1,False,person,0.000,,"[77026, 93888]",0.000,199.000,0.000,normal
35,9875,Adolfo Arana,Calle de Isabel Colbrand 18 Portal B 3º2,Madrid,28050,"[68, Spain]",6,False,person,0.000,,"[103699, 44931, 31896, 24444, 24406]",0.000,36.000,0.000,normal
36,9876,Adolfo Téllez,Calle General Álvarez de Castro 36 2ºA,Madrid,28010,"[68, Spain]",3,False,person,0.000,,"[24470, 24432, 24392]",0.000,119.970,0.000,normal
61,11311,Alba Castellet,Gran Vía Corts Catalanes 672,Barcelona,08010,"[68, Spain]",1,False,person,0.000,,"[63856, 70691]",0.000,90.000,0.000,normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1537,12699,Upf,,,,,1,False,person,0.000,,,0.000,0.000,0.000,normal
1538,10979,uping.alejandro@gmail.com,Cumbres francesas 112,Monterrey,64820,"[156, Mexico]",2,False,person,0.000,,"[63345, 63684, 55930]",0.000,39.980,0.000,normal
1543,9917,Valentin Salas,Montsant nave 6,Sant Fruitós de Bages,08272,"[68, Spain]",1,False,person,0.000,,[24391],0.000,39.990,0.000,normal
1569,13577,"Vitola Marketing, S.L.",,,,,2,False,person,0.000,,,0.000,0.000,0.000,normal


Tenemos personas físicas y jurídicas, como trabajamos con operaciones B2B y por alguna razón en partners tienen contactos de personas trabajando en empresas que también estan ya incluidas, eliminaré todo lo que no sea company_type = company

In [75]:
partners_df['company_type'].value_counts()

company_type
company    1370
person      261
Name: count, dtype: int64

In [76]:
partners_df = partners_df.drop(partners_df[partners_df['company_type'] != 'company'].index)
partners_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1370 entries, 0 to 1630
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              1370 non-null   int64  
 1   name            1370 non-null   object 
 2   street          1367 non-null   object 
 3   city            1346 non-null   object 
 4   zip             1347 non-null   object 
 5   country_id      1366 non-null   object 
 6   customer_rank   1370 non-null   int64  
 7   is_company      1370 non-null   bool   
 8   company_type    1370 non-null   object 
 9   credit          1370 non-null   float64
 10  vat             1370 non-null   object 
 11  invoice_ids     1365 non-null   object 
 12  total_due       1370 non-null   float64
 13  total_invoiced  1370 non-null   float64
 14  total_overdue   1370 non-null   float64
 15  trust           1370 non-null   object 
dtypes: bool(1), float64(4), int64(2), object(9)
memory usage: 172.6+ KB


customer_rank tampoco sirve, todos son customers por como se extraen

In [77]:
partners_df = partners_df.drop('customer_rank', axis=1)

is_company tampoco ya hemos filtrado por tipo company

In [78]:
partners_df['is_company'].value_counts()

is_company
True    1370
Name: count, dtype: int64

In [79]:
partners_df = partners_df.drop(columns=['is_company'])

#### trust

Demasiada poca información, no sirve

In [80]:
partners_df['trust'].value_counts()

trust
normal    1368
good         1
bad          1
Name: count, dtype: int64

In [81]:
partners_df = partners_df.drop(columns=['trust'])


In [82]:
partners_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1370 entries, 0 to 1630
Data columns (total 13 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              1370 non-null   int64  
 1   name            1370 non-null   object 
 2   street          1367 non-null   object 
 3   city            1346 non-null   object 
 4   zip             1347 non-null   object 
 5   country_id      1366 non-null   object 
 6   company_type    1370 non-null   object 
 7   credit          1370 non-null   float64
 8   vat             1370 non-null   object 
 9   invoice_ids     1365 non-null   object 
 10  total_due       1370 non-null   float64
 11  total_invoiced  1370 non-null   float64
 12  total_overdue   1370 non-null   float64
dtypes: float64(4), int64(1), object(8)
memory usage: 149.8+ KB


#### name and vat

In [83]:
partners_identification_df = partners_df[['name', 'vat']]

In [84]:
partners_identification_df[partners_identification_df['name'].str.contains('Hostinger')]

Unnamed: 0,name,vat
731,Hostinger International Limited,10301365E
732,Hostinger International Limited,CY10301365E


In [86]:
partners_df.to_pickle("partners_cleanedv2.pkl")

Un duplicado

In [87]:
partners_df = partners_df.drop(partners_df[partners_df['id'] == 731].index)

In [88]:
partners_df

Unnamed: 0,id,name,street,city,zip,country_id,company_type,credit,vat,invoice_ids,total_due,total_invoiced,total_overdue
0,14516,200 Labs Inc,"490 Post St, Ste 526",San Francisco,94102,"[233, United States]",company,0.000,46-4595829,"[196646, 196260, 197176, 196645, 196263, 19664...",0.000,1808.500,0.000
1,12500,"202 Digital Reputation, S.L.","C/ Tuset 19, entresuelo",Barcelona,08006,"[68, Spain]",company,0.000,B06906226,"[101525, 100842, 100844, 100079]",0.000,600.000,0.000
2,10577,"2Be Confirmed Events, S.L.","C/ Doctor Fleming, 36",Madrid,28036,"[68, Spain]",company,0.000,B86436243,[46745],0.000,500.000,0.000
3,12233,"2BeGroup&Partners, S.L.","Rua Das Baleras, 13 - 4 Oficina 5",Santiago de Compostela,15705,"[68, Spain]",company,0.000,B70344619,"[162545, 150132, 83140, 83139]",0.000,1871.900,0.000
4,14539,"2 Open EU CN, S.L.U.","C/Sierpes, 3",Cáceres,10003,"[68, Spain]",company,0.000,B10444545,"[166755, 166006]",0.000,600.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1626,14891,ZENVIA MEXICO,LAS PRADERAS 12 - PISO 1 CUBICULO A,Coyoacán,04500,"[156, Mexico]",company,0.000,ZME201027JTA,"[185677, 185678, 185676]",0.000,5264.020,0.000
1627,9285,Zippy - Comercio e Distribuição SA,"Rua João Mendonça, nº 529",Senhora da Hora,4464-503,"[183, Portugal]",company,0.000,PT503226696,"[13463, 11873]",0.000,2650.000,0.000
1628,11067,"Zoconet, S.L.","Avd. Juan López Peñalver, 17",Málaga,29590,"[68, Spain]",company,0.000,B29853264,[58839],0.000,25.000,0.000
1629,14429,"Zumitow, S.L.","C/Pensamiento 27, Pta. 3, Esc. Izq.. Plt. 3",Madrid,28020,"[68, Spain]",company,0.000,B56156581,"[160887, 155371]",0.000,1000.000,0.000


#### invoice_ids

In [10]:
all_invoices_df = pd.read_pickle("all_invoices.pkl")
all_invoices_df['move_type'].value_counts() 

move_type
entry          111574
out_invoice     24293
in_invoice      22930
in_refund         541
out_refund        467
in_receipt         26
Name: count, dtype: int64

In [11]:
original_partners_df = pd.read_pickle("partners.pkl")
invoice_sum = 0
for row in original_partners_df['invoice_ids']:
    if isinstance(row, list):
        invoice_sum += len(row)
    else:
        print(row)
print(f"Total invoices linked to partners: {invoice_sum}")

Total invoices linked to partners: 64298


In [53]:
invoice_sum = 0
for row in partners_df['invoice_ids']:
    if isinstance(row, list):
        invoice_sum += len(row)
    else:
        print(row)
print(f"Total invoices linked to partners: {invoice_sum}")

nan
nan
nan
nan
nan
Total invoices linked to partners: 41435


Viendo que 64298 > 47223 (total de facturas out o in), entiendo que también tienen entrys en invoice_ids, por tanto, no me sirve, cogeré la lista final de facturas y lo haré manualmente

In [None]:
partners_df = pd.read_pickle("partners_cleanedv2.pkl")
invoices_df = pd.read_pickle("invoices_cleanedv7.pkl")
all_invoices_df = pd.read_pickle("all_invoices.pkl")

In [44]:
partners_df

Unnamed: 0,id,name,street,city,zip,country_id,company_type,credit,vat,invoice_ids,total_due,total_invoiced,total_overdue
0,14516,200 Labs Inc,"490 Post St, Ste 526",San Francisco,94102,"[233, United States]",company,0.000,46-4595829,"[196646, 196260, 197176, 196645, 196263, 19664...",0.000,1808.500,0.000
1,12500,"202 Digital Reputation, S.L.","C/ Tuset 19, entresuelo",Barcelona,08006,"[68, Spain]",company,0.000,B06906226,"[101525, 100842, 100844, 100079]",0.000,600.000,0.000
2,10577,"2Be Confirmed Events, S.L.","C/ Doctor Fleming, 36",Madrid,28036,"[68, Spain]",company,0.000,B86436243,[46745],0.000,500.000,0.000
3,12233,"2BeGroup&Partners, S.L.","Rua Das Baleras, 13 - 4 Oficina 5",Santiago de Compostela,15705,"[68, Spain]",company,0.000,B70344619,"[162545, 150132, 83140, 83139]",0.000,1871.900,0.000
4,14539,"2 Open EU CN, S.L.U.","C/Sierpes, 3",Cáceres,10003,"[68, Spain]",company,0.000,B10444545,"[166755, 166006]",0.000,600.000,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1626,14891,ZENVIA MEXICO,LAS PRADERAS 12 - PISO 1 CUBICULO A,Coyoacán,04500,"[156, Mexico]",company,0.000,ZME201027JTA,"[185677, 185678, 185676]",0.000,5264.020,0.000
1627,9285,Zippy - Comercio e Distribuição SA,"Rua João Mendonça, nº 529",Senhora da Hora,4464-503,"[183, Portugal]",company,0.000,PT503226696,"[13463, 11873]",0.000,2650.000,0.000
1628,11067,"Zoconet, S.L.","Avd. Juan López Peñalver, 17",Málaga,29590,"[68, Spain]",company,0.000,B29853264,[58839],0.000,25.000,0.000
1629,14429,"Zumitow, S.L.","C/Pensamiento 27, Pta. 3, Esc. Izq.. Plt. 3",Madrid,28020,"[68, Spain]",company,0.000,B56156581,"[160887, 155371]",0.000,1000.000,0.000


In [49]:
original_partners_df[original_partners_df['id'] == 15076]

Unnamed: 0,id,name,email,phone,street,city,zip,country_id,customer_rank,supplier_rank,...,credit_limit,industry_id,vat,invoice_ids,total_due,total_invoiced,total_overdue,trust,unpaid_invoice_ids,unpaid_invoices_count
1266,15076,pruebas-cif-12345,False,False,False,False,False,"[251, Españaç]",190,0,...,0.0,False,PRuebas-cif-12345,"[197554, 197553, 197552, 197551, 197548, 19754...",0.0,2709.28,0.0,normal,[],0


Añado las facturas de cada cliente

In [51]:
partners_df["invoice_ids"] = [[] for _ in range(len(partners_df))]
not_found_partners = []
for invoice_id, partner_id in zip(invoices_df['id'], invoices_df['partner_id']):
    print(partner_id)
    if partners_df[partners_df['id'] == partner_id].empty:
        print(f"Partner ID {partner_id} not found in partners_df.")
        not_found_partners.append(partner_id)
        continue
    partners_df.loc[partners_df['id'] == partner_id, 'invoice_ids'].iloc[0].append(invoice_id)  

1
14913
1
1
8870
1
1
8527
1
1
1
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in partners_df.
15076
Partner ID 15076 not found in p

In [53]:
len(not_found_partners)
invoice_sum = 0
for row in partners_df['invoice_ids']:
    if isinstance(row, list):
        invoice_sum += len(row)
    else:
        print(row)
print(f"Total invoices linked to partners: {invoice_sum}")

Total invoices linked to partners: 11793


# Other

In [61]:
invoices_df[['amount_total', 'amount_residual']].describe()

Unnamed: 0,amount_total,amount_residual
count,24288.0,24288.0
mean,18940.951,8768.149
std,1159128.047,1140423.34
min,0.0,0.0
25%,12.0,0.0
50%,41.79,0.0
75%,2389.75,0.0
max,177676632.0,177676632.0


Pesos mexicanos, no tiene sentido igualmente factura de casi 8,5 millones de euros

In [62]:
invoices_df[invoices_df['amount_total'] < invoices_df['amount_total'].quantile(0.001)]

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
1102,197237,FVT/2025/00683,out_invoice,not_paid,7,10892.0,1,1.21,1.21,2025-07-29,2025-08-13,,Tandem Trade Marketing SL,Marketplaces España,EUR
1103,197235,FVT/2025/00682,out_invoice,not_paid,7,10892.0,1,1.21,1.21,2025-07-29,2025-08-13,,Tandem Trade Marketing SL,Marketplaces España,EUR
1105,197233,FVT/2025/00680,out_invoice,not_paid,7,10892.0,1,1.21,1.21,2025-07-29,2025-08-13,,Tandem Trade Marketing SL,Marketplaces España,EUR
1134,197217,,out_invoice,not_paid,7,10892.0,1,1.0,1.0,2025-07-28,2025-08-12,,Tandem Trade Marketing SL,Marketplaces España,EUR
1164,197175,FACT-2025-XYZ,out_invoice,not_paid,7,,1,0.0,0.0,,2025-07-23,,Tandem Trade Marketing SL,,EUR
1165,197174,FACT-2025-XYZ,out_invoice,not_paid,1,,1,0.0,0.0,,2025-07-23,,"Grupo Viko Digital Marketing, S.A.",,EUR
1166,197173,FACT-2025-XYZ,out_invoice,not_paid,1,,1,0.0,0.0,,2025-07-23,,"Grupo Viko Digital Marketing, S.A.",,EUR
2057,196186,,out_invoice,not_paid,3,8996.0,1,0.0,0.0,,2025-05-09,,Elogia Media S.L.,"Aldi Masquefa Supermercados, S.L.U.",EUR
2555,195207,FVC/2025/00005,out_invoice,paid,1,13926.0,1,0.0,0.0,2025-03-26,2025-03-26,,"Grupo Viko Digital Marketing, S.A.",Marketplaces Italia,EUR
2556,195204,FVC/2025/00004,out_invoice,paid,1,14.0,1,0.0,0.0,2025-03-26,2025-03-26,,"Grupo Viko Digital Marketing, S.A.",Tandem Trade Marketing SL,EUR


In [139]:
invoices_df['currency_name'].value_counts()

currency_name
EUR    22405
MXN     1628
USD      243
SEK        9
COP        2
GBP        1
Name: count, dtype: int64

In [136]:
invoices_df.head()

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,payment_dates,company_name,partner_name,currency_name
0,198507,403-0199881-4444363,out_invoice,not_paid,7,1,1,43.56,43.56,2025-11-10,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
1,198549,FVM/2025/00068,out_invoice,not_paid,6,14913,1,121.0,121.0,2025-11-07,2025-12-07,,Marketing4ecommerce Digital Content SL,"Ser Sport, S.L.",EUR
2,198548,,out_invoice,not_paid,6,14913,1,726.0,726.0,,2025-12-07,,Marketing4ecommerce Digital Content SL,"Ser Sport, S.L.",EUR
3,198522,ES501155NOOJRS,out_invoice,not_paid,7,1,1,116.16,116.16,2025-11-06,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR
4,198516,ES501152NOOJRS,out_invoice,not_paid,7,1,1,43.56,43.56,2025-11-06,2025-11-06,,Tandem Trade Marketing SL,"Grupo Viko Digital Marketing, S.A.",EUR


No parece que hayan muchos campos con valores null:
- **payment_dates**: principalmente de facturas impagadas
- **invoice_date**: algunas facturas sin fecha
- **payment_ids**: no hay ningún dato útil

In [22]:
differences = invoices_df[invoices_df['invoice_date'] != invoices_df['date']]
differences.tail(3)

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_paid,amount_residual,invoice_date,invoice_date_due,payment_dates,date,create_date,payment_id,payment_ids,days_late
24263,13080,INV1/2022/00002,out_invoice,paid,"[8, INICIATIVAS VIRTUALES DE MEXICO]","[8961, ODEM INTERNACIONAL]","[33, MXN]",95120.0,0.0,0.0,2022-12-21,2023-02-19,2023-02-14,2022-12-31,2023-01-27 13:52:02,False,[],-5.0
24269,13244,INV1/2022/00001,out_invoice,reversed,"[9, MITTUM MARKETING RELACIONAL]","[8994, Pagos y Servicios S.A.]","[2, USD]",5411.25,0.0,0.0,2022-12-30,2023-01-29,NaT,2022-12-31,2023-01-27 14:38:02,False,[],
24270,13079,INV1/2022/00001,out_invoice,reversed,"[8, INICIATIVAS VIRTUALES DE MEXICO]","[9392, AXEL DEMB]","[33, MXN]",800.0,0.0,0.0,2022-12-29,2023-02-15,NaT,2022-12-31,2023-01-27 13:52:02,False,[],


No son iguales

In [26]:
n_nulls = invoices_df['payment_dates'].isnull().sum()
n_not_paid = (invoices_df['is_paid'] == False).sum()
print(f"Nulls: {n_nulls} / Not paid: {n_not_paid}")


Nulls: 5350 / Not paid: 5357


In [32]:
not_null_and_not_paid = invoices_df[invoices_df['payment_dates'].notnull() & (invoices_df['is_paid'] == False)]
not_null_and_not_paid['payment_state'].value_counts()

payment_state
in_payment    139
partial        12
Name: count, dtype: int64

In [None]:
null_and_paid = invoices_df[invoices_df['payment_dates'].notnull() & (invoices_df['is_paid'] == False)]
not_null_and_not_paid

Convierto fechas y creo columna de días de pago tarde:

In [23]:
invoices_df['invoice_date_due'] = pd.to_datetime(invoices_df['invoice_date_due'], errors='coerce', format='%Y-%m-%d')
invoices_df['invoice_date'] = pd.to_datetime(invoices_df['invoice_date'], errors='coerce')
invoices_df['payment_dates'] = pd.to_datetime(invoices_df['payment_dates'], errors='coerce', format='%d/%m/%Y')
invoices_df['date'] = pd.to_datetime(invoices_df['date'], errors='coerce', format='%Y-%m-%d')
invoices_df['create_date'] = pd.to_datetime(invoices_df['create_date'], errors='coerce')
invoices_df['days_late'] = (invoices_df['payment_dates'] - invoices_df['invoice_date_due']).dt.days
invoices_df['is_paid'] = invoices_df['payment_state'] == "paid"

Selecciono los campos relevantes para el análisis:

In [6]:
numerical_cols = invoices_df.select_dtypes(include=['number']).columns.tolist()
categorical_cols = invoices_df.select_dtypes(include=['object', 'category', 'bool']).columns.tolist()
date_cols = invoices_df.select_dtypes(include=['datetime64']).columns.tolist()

In [7]:
print(f"Tipo fecha: {date_cols}")
print(f"Categóricas: {categorical_cols}")
print(f"Numéricas: {numerical_cols}")

Tipo fecha: ['invoice_date', 'invoice_date_due', 'payment_dates', 'date', 'create_date']
Categóricas: ['name', 'move_type', 'payment_state', 'company_id', 'partner_id', 'currency_id', 'payment_id', 'payment_ids']
Numéricas: ['id', 'amount_total', 'amount_paid', 'amount_residual', 'days_late']


Separo por empresa:

In [59]:
invoices_by_company = {}
for c in company_ids:
    invoices_by_company[c] = invoices_df[invoices_df['company_id'].str[0] == c]

Grupo Viko Digital Marketing, S.A. (1)

In [60]:
invoices_by_company[1]

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_paid,amount_residual,invoice_date,invoice_date_due,payment_dates,date,create_date,payment_id,payment_ids,company_id_id,partner_id_id,days_late
1165,197174,FACT-2025-XYZ,out_invoice,not_paid,"[1, Grupo Viko Digital Marketing, S.A.]",False,"[1, EUR]",0.00,0.0,0.0,False,2025-07-23,NaT,2025-07-23,2025-07-23 08:54:56,False,[],1,,
1166,197173,FACT-2025-XYZ,out_invoice,not_paid,"[1, Grupo Viko Digital Marketing, S.A.]",False,"[1, EUR]",0.00,0.0,0.0,False,2025-07-23,NaT,2025-07-23,2025-07-23 08:54:47,False,[],1,,
1167,197172,FACT-2025-XYZ,out_invoice,not_paid,"[1, Grupo Viko Digital Marketing, S.A.]","[123, María Sieiro Alfonsin]","[1, EUR]",484.00,0.0,484.0,2025-07-23,2025-07-23,NaT,2025-07-23,2025-07-23 08:50:42,False,[],1,123.0,
1168,197171,FACT-2025-XYZ,out_invoice,not_paid,"[1, Grupo Viko Digital Marketing, S.A.]","[123, María Sieiro Alfonsin]","[1, EUR]",484.00,0.0,484.0,2025-07-23,2025-07-23,NaT,2025-07-23,2025-07-23 08:50:04,False,[],1,123.0,
1765,196522,FVV/2025/00001,out_invoice,not_paid,"[1, Grupo Viko Digital Marketing, S.A.]","[8571, Google Ireland Limited]","[1, EUR]",10.00,0.0,10.0,False,2025-06-05,NaT,2025-06-05,2025-06-05 13:56:40,False,[],1,8571.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24246,12479,INV1/2022/00005,out_invoice,reversed,"[1, Grupo Viko Digital Marketing, S.A.]","[15, INICIATIVAS VIRTUALES DE MEXICO]","[1, EUR]",2122.40,0.0,0.0,2022-11-30,2023-01-29,NaT,2022-12-31,2023-01-26 15:29:31,False,[],1,15.0,
24252,12478,INV1/2022/00004,out_invoice,paid,"[1, Grupo Viko Digital Marketing, S.A.]","[7749, Property Technology Services, S.L.]","[1, EUR]",544.50,0.0,0.0,2022-12-31,2023-01-22,2023-01-25,2022-12-31,2023-01-26 15:29:31,False,[],1,7749.0,3.0
24258,12477,INV1/2022/00003,out_invoice,paid,"[1, Grupo Viko Digital Marketing, S.A.]","[9250, Equipzilla, S.L.]","[1, EUR]",181.50,0.0,0.0,2022-12-31,2023-01-22,2023-01-13,2022-12-31,2023-01-26 15:29:31,False,[],1,9250.0,-9.0
24264,12476,INV1/2022/00002,out_invoice,paid,"[1, Grupo Viko Digital Marketing, S.A.]","[8048, Lanai Capital Partners, S.L.]","[1, EUR]",145.03,0.0,0.0,2022-12-31,2023-01-30,2023-06-06,2022-12-31,2023-01-26 15:29:31,False,[],1,8048.0,127.0


Elogia Media S.L. (3)

Ibrands Medios Interactivos SL (2)

Kraz Data Solutions SL (5)

Marketing4ecommerce Digital Content SL (6)

Octoplus Digital Shelf Optimization SL (13)

Tandem Trade Marketing SL (7)

DigitalPla2021, S.L. (11)

Ideas y Estrategia Digital SL (14)

INICIATIVAS VIRTUALES DE MEXICO	(8)

IBRANDS MEDIOS INTERACTIVOS DE MEXICO (12)

Creo dos columnas nuevas con únicamente el id de la empresa y del partner en la factura

Filtro las facturas por empresa:

In [1]:
partners_df['id']

NameError: name 'partners_df' is not defined

In [12]:
invoices_df['company_id'].value_counts()

company_id
[7, Tandem Trade Marketing SL]                  12429
[3, Elogia Media S.L.]                           6348
[8, INICIATIVAS VIRTUALES DE MEXICO]             1617
[6, Marketing4ecommerce Digital Content SL]      1184
[11, DigitalPla2021, S.L.]                        785
[2, Ibrands Medios Interactivos SL]               724
[14, Ideas y Estrategia Digital SL]               595
[13, Octoplus Digital Shelf Optimization SL]      194
[1, Grupo Viko Digital Marketing, S.A.]           140
[9, MITTUM MARKETING RELACIONAL]                  115
[5, Kraz Data Solutions SL]                       114
[12, IBRANDS MEDIOS INTERACTIVOS DE MEXICO]        43
Name: count, dtype: int64

#### 1.2.2. res.partner

In [33]:
partners_df = pd.DataFrame(asyncio.run(data_retriever.get_all_customer_partners()))


Recuperadas 500 facturas, total: 500
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1631 entries, 0 to 1630
Data columns (total 26 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   id                     1631 non-null   int64  
 1   name                   1631 non-null   object 
 2   email                  1631 non-null   object 
 3   phone                  1631 non-null   object 
 4   street                 1631 non-null   object 
 5   city                   1631 non-null   object 
 6   zip                    1631 non-null   object 
 7   country_id             1631 non-null   object 
 8   customer_rank          1631 non-null   int64  
 9   supplier_rank          1631 non-null   int64  
 10  category_id            1631 non-null   object 
 11  is_company             1631 non-null   bool   
 12  company_type           1631 non-null   object 
 13  company_id             1631 non-null   object 
 14  credit             

In [35]:
partners_df_original = partners_df.copy()

In [51]:
partners_df = partners_df_original.copy()

In [66]:
invoices_df_original.dtypes

id                    int64
name                 object
move_type            object
payment_state        object
company_id           object
partner_id           object
currency_id          object
amount_total        float64
amount_paid         float64
amount_residual     float64
invoice_date         object
invoice_date_due     object
payment_dates        object
date                 object
create_date          object
payment_id             bool
payment_ids          object
dtype: object

In [54]:
object_cols = partners_df.select_dtypes(include='object').columns
partners_df[object_cols] = (partners_df[object_cols].replace({False: pd.NA}))

In [52]:
partners_df[object_cols] = partners_df[object_cols].map(lambda x: np.nan if x == [] else x)

In [55]:
partners_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1631 entries, 0 to 1630
Data columns (total 26 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   id                     1631 non-null   int64  
 1   name                   1630 non-null   object 
 2   email                  281 non-null    object 
 3   phone                  12 non-null     object 
 4   street                 1568 non-null   object 
 5   city                   1549 non-null   object 
 6   zip                    1551 non-null   object 
 7   country_id             1616 non-null   object 
 8   customer_rank          1631 non-null   int64  
 9   supplier_rank          1631 non-null   int64  
 10  category_id            284 non-null    object 
 11  is_company             1631 non-null   bool   
 12  company_type           1631 non-null   object 
 13  company_id             569 non-null    object 
 14  credit                 1631 non-null   float64
 15  cred

In [61]:
partners_df[partners_df['is_company'] == False]

Unnamed: 0,id,name,email,phone,street,city,zip,country_id,customer_rank,supplier_rank,...,debit,debit_limit,industry_id,invoice_ids,total_due,total_invoiced,total_overdue,trust,unpaid_invoice_ids,unpaid_invoices_count
8,10970,Aaron Escobar,esaaroleesco@icloud.com,,Cuauhtemoc 123,Ciudad de México,16090,"[156, Mexico]",2,0,...,0.0,0.0,,"[63361, 63691, 55911]",0.0,39.98,0.0,normal,,0
12,12023,Abel Hernández,abel@congresomarketingdigital.com,,Caldas Da Raihna 6,Badajoz,6011,"[68, Spain]",1,0,...,0.0,0.0,,"[77026, 93888]",0.0,199.00,0.0,normal,,0
15,9405,Facturacion,facturacion@aby.group,,"Hijas de la Caridad, 108",Bilbao,48009,"[68, Spain]",2,0,...,0.0,0.0,,,0.0,0.00,0.0,normal,,0
20,10178,Luís Granados,lgranados@acesur.com,,Carretera de la carolina,Vilches,23220,"[68, Spain]",1,0,...,0.0,0.0,,,0.0,0.00,0.0,normal,,0
34,13148,Invoice,invoice@adock.io,,"Calle Cardenal Vives i Tutó, 65",Barcelona,08034,"[68, Spain]",1,0,...,0.0,0.0,,,0.0,0.00,0.0,normal,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1568,13665,Teresa,teresa@growwer.com,,"Carretera d'Esplugues 47, Esc. D, 5 - 1",Cornellà del Llobregat,08940,"[68, Spain]",1,0,...,0.0,0.0,,,0.0,0.00,0.0,normal,,0
1569,13577,"Vitola Marketing, S.L.",,,,,,,2,0,...,0.0,0.0,,,0.0,0.00,0.0,normal,,0
1580,13664,Celeste Romero,celeste.arias@vtex.com,,"WeWork Aviation House, 125 Kingsway",London,WC2B 6NH,"[231, United Kingdom]",1,0,...,0.0,0.0,,,0.0,0.00,0.0,normal,,0
1604,13788,Sebastián Díaz,sebastiandiaz@wix.com,,"40 Hanamal Tel Aviv, Beit Yoel",Tel Aviv,6350671,"[102, Israel]",1,0,...,0.0,0.0,,,0.0,0.00,0.0,normal,,0


In [56]:
partners_df

Unnamed: 0,id,name,email,phone,street,city,zip,country_id,customer_rank,supplier_rank,...,debit,debit_limit,industry_id,invoice_ids,total_due,total_invoiced,total_overdue,trust,unpaid_invoice_ids,unpaid_invoices_count
0,14516,200 Labs Inc,,,"490 Post St, Ste 526",San Francisco,94102,"[233, United States]",1,5,...,600.0,0.0,,"[196646, 196260, 197176, 196645, 196263, 19664...",0.0,1808.50,0.0,normal,,0
1,12500,"202 Digital Reputation, S.L.",,,"C/ Tuset 19, entresuelo",Barcelona,08006,"[68, Spain]",3,0,...,0.0,0.0,,"[101525, 100842, 100844, 100079]",0.0,600.00,0.0,normal,,0
2,10577,"2Be Confirmed Events, S.L.",,,"C/ Doctor Fleming, 36",Madrid,28036,"[68, Spain]",1,0,...,0.0,0.0,,[46745],0.0,500.00,0.0,normal,,0
3,12233,"2BeGroup&Partners, S.L.",,,"Rua Das Baleras, 13 - 4 Oficina 5",Santiago de Compostela,15705,"[68, Spain]",3,0,...,0.0,0.0,,"[162545, 150132, 83140, 83139]",0.0,1871.90,0.0,normal,,0
4,14539,"2 Open EU CN, S.L.U.",,,"C/Sierpes, 3",Cáceres,10003,"[68, Spain]",1,0,...,0.0,0.0,,"[166755, 166006]",0.0,600.00,0.0,normal,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1626,14891,ZENVIA MEXICO,,,LAS PRADERAS 12 - PISO 1 CUBICULO A,Coyoacán,04500,"[156, Mexico]",1,0,...,0.0,0.0,,"[185677, 185678, 185676]",0.0,5264.02,0.0,normal,,0
1627,9285,Zippy - Comercio e Distribuição SA,,,"Rua João Mendonça, nº 529",Senhora da Hora,4464-503,"[183, Portugal]",1,0,...,0.0,0.0,,"[13463, 11873]",0.0,2650.00,0.0,normal,,0
1628,11067,"Zoconet, S.L.",,,"Avd. Juan López Peñalver, 17",Málaga,29590,"[68, Spain]",1,0,...,0.0,0.0,,[58839],0.0,25.00,0.0,normal,,0
1629,14429,"Zumitow, S.L.",,,"C/Pensamiento 27, Pta. 3, Esc. Izq.. Plt. 3",Madrid,28020,"[68, Spain]",1,0,...,0.0,0.0,,"[160887, 155371]",0.0,1000.00,0.0,normal,,0


A simple vista, se puede observar que hay muchos valores vacíos (False en Odoo) y campos vacíos ([] en category_id).

Analizaré los clientes de cada empresa:

In [6]:
partners_df['company_id'].value_counts()

company_id
False                                           1062
[7, Tandem Trade Marketing SL]                   179
[11, DigitalPla2021, S.L.]                       154
[8, INICIATIVAS VIRTUALES DE MEXICO]             153
[6, Marketing4ecommerce Digital Content SL]       29
[14, Ideas y Estrategia Digital SL]               18
[3, Elogia Media S.L.]                            17
[12, IBRANDS MEDIOS INTERACTIVOS DE MEXICO]        6
[13, Octoplus Digital Shelf Optimization SL]       6
[9, MITTUM MARKETING RELACIONAL]                   4
[2, Ibrands Medios Interactivos SL]                3
Name: count, dtype: int64

La mayoría de partners no tienen asociadas las empresas con las que han hecho transacciones...

Lo sacaré de account.move

In [28]:
company_ids = [1,2,3,5,6,7,8,9,11,12,13,14]
def get_partners_by_company(company_id):
    invoices_id = invoices_df[invoices_df['company_id_id'] == 1]
    partners_id = invoices_id['partner_id_id'].unique()
    return partners_df[partners_df['id'].isin(partners_id)]

In [29]:
partners_by_company = []
for c in company_ids:
    partners_by_company.append({c : get_partners_by_company(c)})

In [None]:
invoices_df['company_id_id'] = invoices_df['company_id'].str[0]
invoices_df['partner_id_id'] = invoices_df['partner_id'].str[0]

In [38]:
company_ids = [1,2,3,5,6,7,8,9,11,12,13,14]
invoices_by_company = {}
for c in company_ids:
    invoices_by_company[c] = invoices_df[invoices_df['company_id'].str[0] == c]

Grupo Viko Digital Marketing, S.A. (1)

In [39]:
invoices_by_company[1]['partner_id'].value_counts()

partner_id
[9250, Equipzilla, S.L.]                           18
[10, Elogia Media S.L.]                            17
[11527, The Tropicfeel S.L.]                       12
[7749, Property Technology Services, S.L.]         10
[13, Marketing4ecommerce Digital Content SL]        6
[8527, Fundació Pasqual Maragall]                   6
[12, Kraz Data Solutions SL]                        6
[16, MITTUM MARKETING RELACIONAL]                   5
[10436, Octoplus Digital Shelf Optimization SL]     5
[12322, Byfacility, S.L.]                           5
[15, INICIATIVAS VIRTUALES DE MEXICO]               4
[13260, Ideas y Estrategia Digital SL]              3
[8307, Carglass, S.L.U.]                            3
[18, DigitalPla2021, S.L.]                          3
[12548, Barkibu S.L.]                               2
[12867, Ufinet Latam, S.L.U.]                       2
False                                               2
[123, María Sieiro Alfonsin]                        2
[14, Tandem Trade

Elogia Media S.L. (3)

In [54]:
print(invoices_by_company[3]['partner_id'].count())
invoices_by_company[3]['partner_id'].value_counts()

6348


partner_id
[8920, Boehringer Ingelheim España, S.A.]       573
[8679, Fira Internacional de Barcelona (ES)]    351
[8527, Fundació Pasqual Maragall]               231
[9308, AMAZON INVOICING SPAIN]                  201
[8870, Hero España SA]                          190
                                               ... 
[7758, Laboratorios Niam, S.L.]                   1
[8170, Bcnscience, S.L.]                          1
[7645, Galicia Sport 360 SLU]                     1
[7432, Naturgy Iberia, S.A.]                      1
[8003, Gree Products, S.L.]                       1
Name: count, Length: 296, dtype: int64

Ibrands Medios Interactivos SL (2)

In [42]:
invoices_by_company[2]['partner_id'].value_counts()

partner_id
[10, Elogia Media S.L.]                      72
[7907, Aby Marketing Dreams, S.L.]           35
[9311, Cint AB]                              30
[7854, Merkal Calzados S.L.]                 28
[7654, Feebbo Solutions, S.L.]               25
                                             ..
[8415, Kokoen GmbH]                           1
[8895, Datawork Marketing SL]                 1
[7481, Prosegur Compañía de Seguridad SA]     1
[7519, Roman y Asociados S.A.]                1
[9309, Diario ABC, S.L.]                      1
Name: count, Length: 96, dtype: int64

Kraz Data Solutions SL (5)

In [43]:
invoices_by_company[5]['partner_id'].value_counts()

partner_id
[10, Elogia Media S.L.]                                       16
[7812, Salvetti & Llombart, S.L.]                              9
[7773, Kave Home S.L.]                                         8
[7485, Unilever España, S.A.]                                  8
[1, Grupo Viko Digital Marketing, S.A.]                        7
[7419, Editorial Planeta, S.A.U.]                              7
[7461, Joyeria Tous S.A]                                       7
[7479, Vinoselección, S.A.]                                    6
[12541, SEAT, S.A.]                                            6
[10993, Pikostore, S.L.U.]                                     5
[10994, Pikolinos Intercontinental, S.A.]                      5
[8679, Fira Internacional de Barcelona (ES)]                   4
[10436, Octoplus Digital Shelf Optimization SL]                4
[13766, RuralMed, S.L.]                                        3
[7409, Ferrer Internacional, S.A.]                             3
[7421, Saba Ap

Marketing4ecommerce Digital Content SL (6)

In [44]:
invoices_by_company[6]['partner_id'].value_counts()

partner_id
[8571, Google Ireland Limited]            39
[9277, Pipedrive Inc]                     25
[8012, Virality Media, S.L.]              23
[9665, Pixel Labs LLC]                    22
[9605, Getlinko International, S.L.]      21
                                          ..
[14923, Ceramic Connection Shop, S.L.]     1
[14912, Santafixie Group, S.L.]            1
[14911, Channelbook, S.L.U.]               1
[15057, Flyeralarm, S.L.]                  1
[8527, Fundació Pasqual Maragall]          1
Name: count, Length: 472, dtype: int64

Octoplus Digital Shelf Optimization SL (13)

In [45]:
invoices_by_company[13]['partner_id'].value_counts()

partner_id
[10590, Beam Suntory Distribution SL]                    98
[10587, Beam Inc. Global Business Services]              62
[7403, Nestlé España S.A.]                                5
[10598, Medios Activos y Aplicacion de Servicios S.L]     4
[10595, Ceva Sante Animale]                               4
[12060, Beam Suntory España Beverages, S.L.U.]            3
[10616, Beautyge, S.L.]                                   3
[8062, Optopus Optimisation, S.L.]                        2
[14768, Beam Suntory Asia Pte. Ltd.]                      2
[10858, Beam Canada Inc.]                                 2
[14722, Colgate-Palmolive España, S.A.]                   2
[10594, Beam Suntory Australia Pty Ltd]                   2
[13260, Ideas y Estrategia Digital SL]                    1
[10592, Beam Suntory Germany GmbH]                        1
[11484, Casa Santiveri, S.L.]                             1
[10589, Beam Suntory Spain S.L]                           1
[10588, Cuetara S.L.U]       

Tandem Trade Marketing SL (7)

In [46]:
invoices_by_company[7]['partner_id'].value_counts()

partner_id
[10892, Marketplaces España]                 7606
[13926, Marketplaces Italia]                 2742
[13924, Marketplaces Francia]                 499
[14, Tandem Trade Marketing SL]               326
[15076, pruebas-cif-12345]                    190
                                             ... 
[10087, José Carlos Rodríguez Diago]            1
[10331, Inmopanta. S.L.]                        1
[10332, TecniOrganic, S.L.U.]                   1
[10333, La Costanera Santa Eulalia, S.L.]       1
[15041, Nicolás Sánchez-Biezma]                 1
Name: count, Length: 296, dtype: int64

DigitalPla2021, S.L. (11)

In [47]:
invoices_by_company[11]['partner_id'].value_counts()

partner_id
[9902, Manuel Alejandro Mesa Sánchez]    52
[9913, Sara Carbajo]                     26
[9881, Betty Lepina]                     26
[9916, Tomasz Smardzewski]               26
[9901, Paula Garcia Bustos]              26
                                         ..
[9887, Lola Garau]                        1
[9896, ISABEL ORGAZ TARAVILLA]            1
[9886, Daniel Lopez]                      1
[9883, Carmen Luz Zarrías Villena]        1
[9917, Valentin Salas]                    1
Name: count, Length: 233, dtype: int64

Ideas y Estrategia Digital SL (14)

In [48]:
invoices_by_company[14]['partner_id'].value_counts()

partner_id
[13557, Chiesi España, S.A]                          124
[13548, Esteve Pharmaceuticals SA]                   113
[13558, Elanco Spain S.L.]                           106
[8933, Zambon, S.A.U.]                                66
[13559, Kern Pharma S.L.]                             47
[13549, Swedish Orphan Biovitrum S.L.]                34
[13542, Laboratorio Reig Jofre, S.A.]                 21
[13547, Alexion Pharma Nordics AB]                    18
[13543, Alexion Pharma GMBH]                          11
[14595, Swedish Orphan Biovitrum AB (publ)]            8
[7583, Reckitt Benckiser Healthcare, S.A.]             8
[14476, Faes Farma S.A.]                               7
[13654, Atika Pharma S.L.]                             7
[13545, Elanco AH Portugal Unipessoal Lda.]            6
[13544, Angelini Pharma Portugal, Unipessoal Lda]      3
[13553, AdSalutem Lullaai S.L]                         2
[14627, Asociación Española Contra el Cáncer]          2
[10, Elogia Media S.

INICIATIVAS VIRTUALES DE MEXICO	(8)

In [49]:
invoices_by_company[8]['partner_id'].value_counts()

partner_id
[12056, COMERCIALIZADORA ALMACENES GARCIA DE MEXICO]    69
[9678, DAWN-MIXCO INTERNACIONAL]                        66
[8960, SEPHORA MEXICO]                                  60
[11041, TOYOTA TSUSHO CORPORATION DE MEXICO]            47
[9759, VCD CONSTRUCCION Y DESARROLLO]                   44
                                                        ..
[9391, RAGS]                                             1
[9390, SERVICIOS DE CAPITAL HUMANO AXO]                  1
[9389, MR BON MEXICO]                                    1
[9388, PULQUEDIGITAL]                                    1
[9392, AXEL DEMB]                                        1
Name: count, Length: 172, dtype: int64

IBRANDS MEDIOS INTERACTIVOS DE MEXICO (12)

In [50]:
invoices_by_company[12]['partner_id'].value_counts()

partner_id
[8987, ASALES, SERVICIOS DE MARKETING EN INTERNET]    30
[9387, TIENDAS SORIANA]                                5
[11708, ANTEVENIO MEXICO]                              4
[13917, KOVAFINANSI AMERICAS CORPORATE]                1
[16, MITTUM MARKETING RELACIONAL]                      1
[9427, SILVERSPRINGS SERVICIOS CORPORATIVOS]           1
[9386, INDUSTRIAS TUK]                                 1
Name: count, dtype: int64

MITTUM MARKETING RELACIONAL (9)

In [51]:
invoices_by_company[9]['partner_id'].value_counts()

partner_id
[8991, SEGUROS BANAMEX]                           30
[8989, TARJETAS BANAMEX]                          28
[8993, BANCO NACIONAL DE MEXICO]                  26
[9797, SOCIEDAD COOPERATIVA DE CONSUMO PEMEX,]    20
[8994, Pagos y Servicios S.A.]                     6
[8992, EVO PAYMENTS MEXICO.]                       5
Name: count, dtype: int64

##### Conclusiones

Hay diferencias significativas entre cada empresa:
- **Tandem Trade Marketing**: Tiene el mayor nombre de facturas (más de 10.000), la mayoría concentradas en Markeplace. Son de importe bajo y de alta frecuencia y se cobran inmediatamente, por tanto, tienen un riesgo de impago prácticamente nulo y pueden afectar negativamente al modelo de predicción aportando un sesgo positivo.
- **Elogia Media**: facturación B2B con importes medio/altos

# MVP

Comenzaré realizando una versión mínima viable del modelo de predicción de impagos.

Para facilitar el proceso, este modelo entrenará a partir de las facturas y podrá predecir si una factura va a ser pagada a tiempo o tarde.

In [None]:
!pip install pydantic[email]

In [None]:
pip install mcp_odoo/.

In [7]:
pip install nest_asyncio

Note: you may need to restart the kernel to use updated packages.


Connected to Odoo as albert.gil@yourtechtribe.com (uid: 430)
Odoo server version: {'server_version': '16.0+e-20250313', 'server_version_info': [16, 0, 0, 'final', 0, 'e'], 'server_serie': '16.0', 'protocol_version': 1}


# Datos empresas

In [15]:
asyncio.run(data_retriever.get_all_companies())

[Company(id=1, name='Grupo Viko Digital Marketing, S.A.', currency_id=(1, 'EUR')),
 Company(id=3, name='Elogia Media S.L.', currency_id=(1, 'EUR')),
 Company(id=2, name='Ibrands Medios Interactivos SL', currency_id=(1, 'EUR')),
 Company(id=5, name='Kraz Data Solutions SL', currency_id=(1, 'EUR')),
 Company(id=6, name='Marketing4ecommerce Digital Content SL', currency_id=(1, 'EUR')),
 Company(id=13, name='Octoplus Digital Shelf Optimization SL', currency_id=(1, 'EUR')),
 Company(id=7, name='Tandem Trade Marketing SL', currency_id=(1, 'EUR')),
 Company(id=11, name='DigitalPla2021, S.L.', currency_id=(1, 'EUR')),
 Company(id=14, name='Ideas y Estrategia Digital SL', currency_id=(1, 'EUR')),
 Company(id=8, name='INICIATIVAS VIRTUALES DE MEXICO', currency_id=(33, 'MXN')),
 Company(id=12, name='IBRANDS MEDIOS INTERACTIVOS DE MEXICO', currency_id=(33, 'MXN')),
 Company(id=9, name='MITTUM MARKETING RELACIONAL', currency_id=(33, 'MXN'))]

In [12]:
fields = [
        "id",
        "name",
        "email",
        "phone",
        "street",
        "city",
        "zip",
        "country_id",
        "customer_rank",
        "supplier_rank",
        "category_id",
        "company_type",
        "credit",
        "credit_limit",
        "debit",
        "debit_limit",
        "industry_id",
        "invoice_ids",
        "total_due",
        "total_invoiced",
        "total_overdue",
        "trust",
        "unpaid_invoice_ids",
        "unpaid_invoices_count",
    ]

partners = await odoo_connection.search_read(
        model="res.partner",
        domain=[("customer_rank", ">", 0)],
        fields=fields,
        limit=1000,
        offset=0
    )

In [24]:
payments = await odoo_connection.search_read(
        model="res.partner",
        domain=[("payment_type", "=", "inbound")],
        fields=[],
        limit=1000,
        offset=0
    )

OdooConnectionError: Error executing search_read on res.partner: <Fault 1: 'Traceback (most recent call last):\n  File "/opt/odoo/odoo/odoo/addons/base/controllers/rpc.py", line 151, in xmlrpc_2\n    response = self._xmlrpc(service)\n               ^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/addons/base/controllers/rpc.py", line 127, in _xmlrpc\n    result = dispatch_rpc(service, method, params)\n             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/http.py", line 369, in dispatch_rpc\n    return dispatch(method, params)\n           ^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/service/model.py", line 56, in dispatch\n    res = execute_kw(db, uid, *params[3:])\n          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/service/model.py", line 79, in execute_kw\n    return execute(db, uid, obj, method, *args, **kw or {})\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/service/model.py", line 84, in execute\n    res = execute_cr(cr, uid, obj, method, *args, **kw)\n          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/service/model.py", line 70, in execute_cr\n    result = retrying(partial(odoo.api.call_kw, recs, method, args, kw), env)\n             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/service/model.py", line 152, in retrying\n    result = func()\n             ^^^^^^\n  File "/opt/odoo/odoo/odoo/api.py", line 480, in call_kw\n    result = _call_kw_model(method, model, args, kwargs)\n             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/api.py", line 451, in _call_kw_model\n    result = method(recs, *args, **kwargs)\n             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/models.py", line 5048, in search_read\n    records = self.search(domain or [], offset=offset, limit=limit, order=order)\n              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/models.py", line 1533, in search\n    res = self._search(domain, offset=offset, limit=limit, order=order, count=count)\n          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/addons/base/models/res_partner.py", line 944, in _search\n    return super(Partner, self)._search(args, offset=offset, limit=limit, order=order,\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/models.py", line 4717, in _search\n    query = self._where_calc(domain)\n            ^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/models.py", line 4482, in _where_calc\n    return expression.expression(domain, self).query\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File "/opt/odoo/odoo/odoo/osv/expression.py", line 447, in __init__\n    self.parse()\n  File "/opt/odoo/odoo/odoo/osv/expression.py", line 674, in parse\n    raise ValueError("Invalid field %s.%s in leaf %s" % (model._name, path[0], str(leaf)))\nValueError: Invalid field res.partner.payment_type in leaf (\'payment_type\', \'=\', \'inbound\')\n'>

In [20]:
count_inbound = await odoo_connection.execute_kw(
    model="account.payment",
    method="search_count",
    args=[[("payment_type", "=", "outbound")]]
)

# Datos facturas

In [32]:
company_id = 14
invoices = asyncio.run(data_retriever.get_all_outbound_invoices(company_id))
invoices_df = pd.DataFrame([i.to_dict() for i in invoices])

Recuperadas 500 facturas, total: 500
Recuperadas 95 facturas, total: 595


In [33]:
invoices_df.describe()

Unnamed: 0,id,amount_total,amount_residual,days_overdue
count,595.0,595.0,595.0,595.0
mean,143420.842017,5338.248672,912.600571,7.858824
std,21899.350306,8546.715734,4151.656529,13.038414
min,121169.0,108.73,0.0,-1.0
25%,130328.5,955.9,0.0,0.0
50%,130477.0,2178.0,0.0,4.0
75%,157948.5,6198.225,0.0,11.5
max,194734.0,90096.6,41964.87,103.0


In [34]:
invoices_df.columns.values

array(['id', 'name', 'move_type', 'payment_state', 'company_id',
       'partner_id', 'currency_id', 'amount_total', 'amount_residual',
       'invoice_date', 'invoice_date_due', 'journal_id', 'payment_dates',
       'paid_late', 'days_overdue'], dtype=object)

In [35]:
invoices_df["payment_dates"].dtypes

dtype('O')

In [36]:
invoices_df["payment_state"].value_counts()

payment_state
paid        515
not_paid     80
Name: count, dtype: int64

In [37]:
invoices_df["paid_late"].value_counts()

paid_late
True     365
False    150
Name: count, dtype: int64

In [38]:
invoices_df["partner_id"].value_counts()

partner_id
(13557, Chiesi España, S.A)                          124
(13548, Esteve Pharmaceuticals SA)                   113
(13558, Elanco Spain S.L.)                           106
(8933, Zambon, S.A.U.)                                66
(13559, Kern Pharma S.L.)                             47
(13549, Swedish Orphan Biovitrum S.L.)                34
(13542, Laboratorio Reig Jofre, S.A.)                 21
(13547, Alexion Pharma Nordics AB)                    18
(13543, Alexion Pharma GMBH)                          11
(14595, Swedish Orphan Biovitrum AB (publ))            8
(7583, Reckitt Benckiser Healthcare, S.A.)             8
(14476, Faes Farma S.A.)                               7
(13654, Atika Pharma S.L.)                             7
(13545, Elanco AH Portugal Unipessoal Lda.)            6
(13544, Angelini Pharma Portugal, Unipessoal Lda)      3
(13553, AdSalutem Lullaai S.L)                         2
(14627, Asociación Española Contra el Cáncer)          2
(10, Elogia Media S.

In [16]:
invoices_df["currency_id"].value_counts()

currency_id
(1, EUR)      12388
(18, SEK)         9
(142, GBP)        1
Name: count, dtype: int64

In [10]:
invoices_df

Unnamed: 0,id,name,move_type,payment_state,company_id,partner_id,currency_id,amount_total,amount_residual,invoice_date,invoice_date_due,journal_id,payment_dates,paid_late,days_overdue
0,196743,FVE/2025/00376,out_invoice,not_paid,"(3, Elogia Media S.L.)","(8920, Boehringer Ingelheim España, S.A.)","(1, EUR)",242.00,242.00,2025-06-17,2025-08-16,"(28, Facturas ventas Elogia)",,,-1
1,196654,FVE/2025/00375,out_invoice,not_paid,"(3, Elogia Media S.L.)","(14945, Olistic Research Labs, S.L.)","(1, EUR)",453.75,453.75,2025-06-12,2025-08-11,"(28, Facturas ventas Elogia)",,,-1
2,196649,FVE/2025/00374,out_invoice,not_paid,"(3, Elogia Media S.L.)","(14945, Olistic Research Labs, S.L.)","(1, EUR)",907.50,907.50,2025-06-12,2025-08-11,"(28, Facturas ventas Elogia)",,,-1
3,196631,FVE/2025/00373,out_invoice,not_paid,"(3, Elogia Media S.L.)","(14945, Olistic Research Labs, S.L.)","(1, EUR)",1210.00,1210.00,2025-06-11,2025-08-10,"(28, Facturas ventas Elogia)",,,-1
4,196628,FVE/2025/00372,out_invoice,not_paid,"(3, Elogia Media S.L.)","(14945, Olistic Research Labs, S.L.)","(1, EUR)",1210.00,1210.00,2025-06-11,2025-08-10,"(28, Facturas ventas Elogia)",,,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6343,9691,INV1/2022/00005,out_invoice,paid,"(3, Elogia Media S.L.)","(9247, Apoteca Natura Spa)","(1, EUR)",2250.00,0.00,2022-12-31,2023-01-30,"(340, Carga facturas venta Elogia)",2023-02-01,True,2
6344,9690,INV1/2022/00004,out_invoice,paid,"(3, Elogia Media S.L.)","(8301, Goiko Grill Group SL.)","(1, EUR)",11555.50,0.00,2022-12-31,2023-01-29,"(340, Carga facturas venta Elogia)",2023-02-17,True,19
6345,9689,INV1/2022/00003,out_invoice,paid,"(3, Elogia Media S.L.)","(7439, Boehringer Ingelheim Animal Health Espa...","(1, EUR)",1597.20,0.00,2022-12-31,2023-02-05,"(340, Carga facturas venta Elogia)",2023-03-01,True,24
6346,9688,INV1/2022/00002,out_invoice,paid,"(3, Elogia Media S.L.)","(8663, Irmaos Vila Nova SA)","(1, EUR)",12195.41,0.00,2022-12-31,2023-02-18,"(340, Carga facturas venta Elogia)",2023-02-21,True,3
