In [77]:
import pandas as pd
import xml.etree.ElementTree as ET
from sqlalchemy import text
from connection import connect
from to_spanish import convert_language
from transformers import MarianMTModel, MarianTokenizer
import numpy as np

In [78]:
# Conexión
co_oltp,etl_conn,etl_conn_or= connect()

# Data exploration

In [79]:
pd.set_option('display.max_columns', None)

# Lista de tablas que queremos inspeccionar
candidate_tables = [
    ('person', 'person'),
    ('sales', 'customer'),
    ('person', 'address'),
    ('person', 'state_province'),
    ('person', 'email_address'),
    ('person', 'person_phone'),
    ('sales', 'sales_territory'),
    ('sales', 'sales_order_header')
]

for schema, table in candidate_tables:
    try:
        cols = inspector_co.get_columns(table, schema=schema)
        print(f"\n{schema}.{table} -> {len(cols)} columnas")
        for c in cols:
            print(f"  - {c['name']:30} {c['type']}")
    except Exception as e:
        print(f"\nNo se pudo obtener columnas de {schema}.{table}: {e}")



No se pudo obtener columnas de person.person: name 'inspector_co' is not defined

No se pudo obtener columnas de sales.customer: name 'inspector_co' is not defined

No se pudo obtener columnas de person.address: name 'inspector_co' is not defined

No se pudo obtener columnas de person.state_province: name 'inspector_co' is not defined

No se pudo obtener columnas de person.email_address: name 'inspector_co' is not defined

No se pudo obtener columnas de person.person_phone: name 'inspector_co' is not defined

No se pudo obtener columnas de sales.sales_territory: name 'inspector_co' is not defined

No se pudo obtener columnas de sales.sales_order_header: name 'inspector_co' is not defined


# Extract

In [80]:
# Recuperando dimension de geografía

df_geo_with_keys = pd.read_sql(
    text("""
        SELECT geography_key, city, postal_code, state_province_code, country_region_code
    FROM dim_geography;
    """),
    etl_conn
)

print(f"Se recuperaron {len(df_geo_with_keys)} registros de geografía.")
print(df_geo_with_keys.head())


Se recuperaron 655 registros de geografía.
   geography_key          city postal_code state_province_code  \
0            656   Lake George       12845                 NY    
1            657    North Ryde        2113                 NSW   
2            658          Clay       13041                 NY    
3            659  Bell Gardens       90201                 CA    
4            660       Burbank       91502                 CA    

  country_region_code  
0                  US  
1                  AU  
2                  US  
3                  US  
4                  US  


In [81]:
query_dimension = text("""

--- SOLO COMPRAS HECHAS DESDE INTERNET
WITH internet_customers AS (
    SELECT DISTINCT customer_id
    FROM sales.sales_order_header
    WHERE online_order_flag = true
)


SELECT
    c.account_number AS customer_alternate_key,
    p.title,
    p.first_name,
    p.middle_name,
    p.last_name,
    p.suffix,
    p.name_style,
    p.demographics,
    e.email_address,
    rp.phone_number AS phone,
    a.address_line_1 AS address_line1,
    a.address_line_2 AS address_line2,

    a.city,
    a.postal_code,
    sp.state_province_code,
    cr.country_region_code
FROM
    sales.customer AS c
    INNER JOIN internet_customers AS ic
        ON c.customer_id = ic.customer_id
    INNER JOIN person.person AS p
        ON c.person_id = p.business_entity_id
    INNER JOIN person.email_address AS e
        ON p.business_entity_id = e.business_entity_id
    LEFT JOIN person.person_phone AS rp
        ON p.business_entity_id = rp.business_entity_id
    INNER JOIN person.business_entity_address AS bea
        ON p.business_entity_id = bea.business_entity_id
    INNER JOIN person.address AS a
        ON bea.address_id = a.address_id
    INNER JOIN person.state_province AS sp
        ON a.state_province_id = sp.state_province_id
    INNER JOIN person.country_region AS cr
        ON sp.country_region_code = cr.country_region_code
    INNER JOIN person.address_type AS at
        ON bea.address_type_id = at.address_type_id
WHERE
    at.name = 'Home';
""")

df_ctm = pd.read_sql(query_dimension, co_oltp)


In [82]:
print(df_ctm.columns)
print(df_ctm.shape)

Index(['customer_alternate_key', 'title', 'first_name', 'middle_name',
       'last_name', 'suffix', 'name_style', 'demographics', 'email_address',
       'phone', 'address_line1', 'address_line2', 'city', 'postal_code',
       'state_province_code', 'country_region_code'],
      dtype='object')
(18484, 16)


# Transform

In [83]:
print(df_ctm['demographics'].apply(type).value_counts())

demographics
<class 'str'>    18484
Name: count, dtype: int64


In [84]:
# Extraer elementos del xml demographics

def extract_text_or_none(element):
    return element.text if element is not None else None

none_xml_columns_dict = {
            'birth_date': None,
            'gender' : None,
            'marital_status' : None,
            'yearly_income' : None,
            'education' : None,
            'occupation' : None,
            'house_owner_flag' : None,
            'number_cars_owned' : None,
            'date_first_purchase' : None,
            'commute_distance' : None,
        }

def extract_demographics(xml_string):
    if pd.isna(xml_string):
        return none_xml_columns_dict

    try:
        root = ET.fromstring(xml_string)
        ns = {'ns': root.tag.split('}')[0].strip('{')}

        def namespace_find(tag):
            el = root.find(f'ns:{tag}', ns)
            return el.text if el is not None else None

        return {
            'birth_date': namespace_find('BirthDate'),
            'gender': namespace_find('Gender'),
            'marital_status': namespace_find('MaritalStatus'),
            'yearly_income': namespace_find('YearlyIncome'),
            'total_children': namespace_find('TotalChildren'),
            'number_children_at_home': namespace_find('NumberChildrenAtHome'),
            'education': namespace_find('Education'),
            'occupation': namespace_find('Occupation'),
            'house_owner_flag': namespace_find('HomeOwnerFlag'),
            'number_cars_owned': namespace_find('NumberCarsOwned'),
            'date_first_purchase': namespace_find('DateFirstPurchase'),
            'commute_distance': namespace_find('CommuteDistance'),
        }

    except Exception:
        return none_xml_columns_dict

demographics_df = df_ctm['demographics'].apply(extract_demographics).apply(pd.Series)
df_ctm = pd.concat([df_ctm.drop('demographics', axis=1), demographics_df], axis=1)


In [85]:
# Pasando valores a booleanos
df_ctm['house_owner_flag'] = df_ctm['house_owner_flag'].astype(bool)

df_ctm.tail()

Unnamed: 0,customer_alternate_key,title,first_name,middle_name,last_name,suffix,name_style,email_address,phone,address_line1,address_line2,city,postal_code,state_province_code,country_region_code,birth_date,gender,marital_status,yearly_income,total_children,number_children_at_home,education,occupation,house_owner_flag,number_cars_owned,date_first_purchase,commute_distance
18479,AW00019379,,Crystal,,Guo,,False,crystal18@adventure-works.com,1 (11) 500 555-0171,988 Mt. Everest Court,,W. York,BD1 4SJ,ENG,GB,1974-11-23Z,F,S,0-25000,0,0,Partial High School,Manual,True,2,2004-04-19Z,0-1 Miles
18480,AW00013933,,Isabella,F,Richardson,,False,isabella91@adventure-works.com,910-555-0166,7413 Alpine Drive,,Torrance,90505,CA,US,1961-06-12Z,F,M,50001-75000,1,0,Partial College,Skilled Manual,True,1,2003-08-30Z,2-5 Miles
18481,AW00024634,,Crystal,S,He,,False,crystal19@adventure-works.com,813-555-0148,4764 East Avenue,,Bremerton,98312,WA,US,1940-04-05Z,F,M,50001-75000,3,0,Bachelors,Management,True,2,2004-04-12Z,10+ Miles
18482,AW00021127,,Crystal,,Zheng,,False,crystal20@adventure-works.com,1 (11) 500 555-0171,"34334, rue Jean Mermoz",,Versailles,78000,78,FR,1975-07-25Z,F,S,25001-50000,0,0,Partial College,Clerical,True,1,2004-02-15Z,2-5 Miles
18483,AW00027980,,Crystal,,Hu,,False,crystal21@adventure-works.com,1 (11) 500 555-0126,6022 La Salle Ct.,,Darlinghurst,2010,NSW,AU,1971-05-05Z,F,M,75001-100000,0,0,Bachelors,Professional,True,4,2003-11-17Z,10+ Miles


In [86]:
# Verificando valores nulos
columns_with_nulls = df_ctm.columns[df_ctm.isnull().any()].tolist()
print("Columnas con valores nulos")
print(columns_with_nulls)

Columnas con valores nulos
['title', 'middle_name', 'suffix', 'address_line2']


In [87]:
df_ctm.rename(columns={'occupation':'english_occupation','education':'english_education'},inplace=True)

df_ctm=convert_language('en','fr','english_education','french_education',df_ctm)
df_ctm=convert_language('en','es','english_education','spanish_education',df_ctm)
df_ctm=convert_language('en','es','english_occupation','spanish_occupation',df_ctm)
df_ctm=convert_language('en','fr','english_occupation','french_occupation',df_ctm)

df_ctm

Found 18484 total rows, but only 5 unique values to translate.
Translation complete.
Found 18484 total rows, but only 5 unique values to translate.
Translation complete.
Found 18484 total rows, but only 5 unique values to translate.
Translation complete.
Found 18484 total rows, but only 5 unique values to translate.
Translation complete.


Unnamed: 0,customer_alternate_key,title,first_name,middle_name,last_name,suffix,name_style,email_address,phone,address_line1,address_line2,city,postal_code,state_province_code,country_region_code,birth_date,gender,marital_status,yearly_income,total_children,number_children_at_home,english_education,english_occupation,house_owner_flag,number_cars_owned,date_first_purchase,commute_distance,french_education,spanish_education,spanish_occupation,french_occupation
0,AW00011377,Mr.,David,R.,Robinett,,False,david22@adventure-works.com,238-555-0100,Pappelallee 6667,,Solingen,42651,NW,DE,1961-02-23Z,M,M,25001-50000,4,0,Graduate Degree,Clerical,True,0,2003-09-01Z,0-1 Miles,Diplôme d'études supérieures,Graduado,Clerical,Clercs
1,AW00011913,Ms.,Rebecca,A.,Robinson,,False,rebecca3@adventure-works.com,648-555-0100,1861 Chinquapin Ct,,Seaford,3198,VIC,AU,1965-06-11Z,F,M,50001-75000,3,3,Bachelors,Professional,True,1,2004-06-05Z,5-10 Miles,Baccalauréat,Bachelors,Cuadro orgánico,Professionnel
2,AW00011952,Ms.,Dorothy,B.,Robinson,,False,dorothy3@adventure-works.com,423-555-0100,4693 Mills Dr.,,Geelong,3220,VIC,AU,1954-09-23Z,M,S,75001-100000,2,0,Partial College,Skilled Manual,True,2,2002-04-07Z,1-2 Miles,Collège partiel,Colegio parcial,Manual especializado,Manuel qualifié
3,AW00020164,Ms.,Carol Ann,F.,Rockne,,False,carolann0@adventure-works.com,439-555-0100,1312 Skycrest Drive,,Lancaster,LA1 1LN,ENG,GB,1943-07-15Z,M,M,25001-50000,1,0,Bachelors,Clerical,True,0,2001-10-27Z,0-1 Miles,Baccalauréat,Bachelors,Clerical,Clercs
4,AW00020211,Mr.,Scott,M.,Rodgers,,False,scott10@adventure-works.com,989-555-0100,9860 Brookview Drive,,East Brisbane,4169,QLD,AU,1968-05-15Z,M,M,50001-75000,2,2,Bachelors,Professional,True,1,2002-04-18Z,2-5 Miles,Baccalauréat,Bachelors,Cuadro orgánico,Professionnel
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18479,AW00019379,,Crystal,,Guo,,False,crystal18@adventure-works.com,1 (11) 500 555-0171,988 Mt. Everest Court,,W. York,BD1 4SJ,ENG,GB,1974-11-23Z,F,S,0-25000,0,0,Partial High School,Manual,True,2,2004-04-19Z,0-1 Miles,École secondaire partielle,Escuela secundaria parcial,Manual,Manuel
18480,AW00013933,,Isabella,F,Richardson,,False,isabella91@adventure-works.com,910-555-0166,7413 Alpine Drive,,Torrance,90505,CA,US,1961-06-12Z,F,M,50001-75000,1,0,Partial College,Skilled Manual,True,1,2003-08-30Z,2-5 Miles,Collège partiel,Colegio parcial,Manual especializado,Manuel qualifié
18481,AW00024634,,Crystal,S,He,,False,crystal19@adventure-works.com,813-555-0148,4764 East Avenue,,Bremerton,98312,WA,US,1940-04-05Z,F,M,50001-75000,3,0,Bachelors,Management,True,2,2004-04-12Z,10+ Miles,Baccalauréat,Bachelors,Gestión,Gestion
18482,AW00021127,,Crystal,,Zheng,,False,crystal20@adventure-works.com,1 (11) 500 555-0171,"34334, rue Jean Mermoz",,Versailles,78000,78,FR,1975-07-25Z,F,S,25001-50000,0,0,Partial College,Clerical,True,1,2004-02-15Z,2-5 Miles,Collège partiel,Colegio parcial,Clerical,Clercs


In [88]:
df_ctm.head()

Unnamed: 0,customer_alternate_key,title,first_name,middle_name,last_name,suffix,name_style,email_address,phone,address_line1,address_line2,city,postal_code,state_province_code,country_region_code,birth_date,gender,marital_status,yearly_income,total_children,number_children_at_home,english_education,english_occupation,house_owner_flag,number_cars_owned,date_first_purchase,commute_distance,french_education,spanish_education,spanish_occupation,french_occupation
0,AW00011377,Mr.,David,R.,Robinett,,False,david22@adventure-works.com,238-555-0100,Pappelallee 6667,,Solingen,42651,NW,DE,1961-02-23Z,M,M,25001-50000,4,0,Graduate Degree,Clerical,True,0,2003-09-01Z,0-1 Miles,Diplôme d'études supérieures,Graduado,Clerical,Clercs
1,AW00011913,Ms.,Rebecca,A.,Robinson,,False,rebecca3@adventure-works.com,648-555-0100,1861 Chinquapin Ct,,Seaford,3198,VIC,AU,1965-06-11Z,F,M,50001-75000,3,3,Bachelors,Professional,True,1,2004-06-05Z,5-10 Miles,Baccalauréat,Bachelors,Cuadro orgánico,Professionnel
2,AW00011952,Ms.,Dorothy,B.,Robinson,,False,dorothy3@adventure-works.com,423-555-0100,4693 Mills Dr.,,Geelong,3220,VIC,AU,1954-09-23Z,M,S,75001-100000,2,0,Partial College,Skilled Manual,True,2,2002-04-07Z,1-2 Miles,Collège partiel,Colegio parcial,Manual especializado,Manuel qualifié
3,AW00020164,Ms.,Carol Ann,F.,Rockne,,False,carolann0@adventure-works.com,439-555-0100,1312 Skycrest Drive,,Lancaster,LA1 1LN,ENG,GB,1943-07-15Z,M,M,25001-50000,1,0,Bachelors,Clerical,True,0,2001-10-27Z,0-1 Miles,Baccalauréat,Bachelors,Clerical,Clercs
4,AW00020211,Mr.,Scott,M.,Rodgers,,False,scott10@adventure-works.com,989-555-0100,9860 Brookview Drive,,East Brisbane,4169,QLD,AU,1968-05-15Z,M,M,50001-75000,2,2,Bachelors,Professional,True,1,2002-04-18Z,2-5 Miles,Baccalauréat,Bachelors,Cuadro orgánico,Professionnel


In [89]:
# Vinculando dim_customer con dim_geography

linking_columns = ['city', 'postal_code', 'state_province_code', 'country_region_code']

df_customer_final = pd.merge(
    df_ctm,
    df_geo_with_keys,
    on=linking_columns,
    how='left'
)

print("Columnas después del merge:", df_customer_final.columns)
print(df_customer_final[['first_name', 'city', 'geography_key']].head())

Columnas después del merge: Index(['customer_alternate_key', 'title', 'first_name', 'middle_name',
       'last_name', 'suffix', 'name_style', 'email_address', 'phone',
       'address_line1', 'address_line2', 'city', 'postal_code',
       'state_province_code', 'country_region_code', 'birth_date', 'gender',
       'marital_status', 'yearly_income', 'total_children',
       'number_children_at_home', 'english_education', 'english_occupation',
       'house_owner_flag', 'number_cars_owned', 'date_first_purchase',
       'commute_distance', 'french_education', 'spanish_education',
       'spanish_occupation', 'french_occupation', 'geography_key'],
      dtype='object')
  first_name           city  geography_key
0      David       Solingen            905
1    Rebecca        Seaford           1277
2    Dorothy        Geelong            826
3  Carol Ann      Lancaster            955
4      Scott  East Brisbane            833


# Load

In [90]:
final_columns = [
    'customer_alternate_key', 'geography_key', 'title', 'first_name', 'middle_name',
    'last_name', 'suffix', 'name_style', 'birth_date', 'gender', 'marital_status',
    'yearly_income', 'total_children', 'number_children_at_home', 'english_education',
    'spanish_education', 'english_occupation', 'spanish_occupation', 'house_owner_flag',
    'number_cars_owned', 'email_address', 'phone', 'address_line1', 'address_line2',
    'date_first_purchase', 'commute_distance','french_occupation','french_education'
]

df_to_load = df_customer_final[final_columns]

df_to_load.head()

Unnamed: 0,customer_alternate_key,geography_key,title,first_name,middle_name,last_name,suffix,name_style,birth_date,gender,marital_status,yearly_income,total_children,number_children_at_home,english_education,spanish_education,english_occupation,spanish_occupation,house_owner_flag,number_cars_owned,email_address,phone,address_line1,address_line2,date_first_purchase,commute_distance,french_occupation,french_education
0,AW00011377,905,Mr.,David,R.,Robinett,,False,1961-02-23Z,M,M,25001-50000,4,0,Graduate Degree,Graduado,Clerical,Clerical,True,0,david22@adventure-works.com,238-555-0100,Pappelallee 6667,,2003-09-01Z,0-1 Miles,Clercs,Diplôme d'études supérieures
1,AW00011913,1277,Ms.,Rebecca,A.,Robinson,,False,1965-06-11Z,F,M,50001-75000,3,3,Bachelors,Bachelors,Professional,Cuadro orgánico,True,1,rebecca3@adventure-works.com,648-555-0100,1861 Chinquapin Ct,,2004-06-05Z,5-10 Miles,Professionnel,Baccalauréat
2,AW00011952,826,Ms.,Dorothy,B.,Robinson,,False,1954-09-23Z,M,S,75001-100000,2,0,Partial College,Colegio parcial,Skilled Manual,Manual especializado,True,2,dorothy3@adventure-works.com,423-555-0100,4693 Mills Dr.,,2002-04-07Z,1-2 Miles,Manuel qualifié,Collège partiel
3,AW00020164,955,Ms.,Carol Ann,F.,Rockne,,False,1943-07-15Z,M,M,25001-50000,1,0,Bachelors,Bachelors,Clerical,Clerical,True,0,carolann0@adventure-works.com,439-555-0100,1312 Skycrest Drive,,2001-10-27Z,0-1 Miles,Clercs,Baccalauréat
4,AW00020211,833,Mr.,Scott,M.,Rodgers,,False,1968-05-15Z,M,M,50001-75000,2,2,Bachelors,Bachelors,Professional,Cuadro orgánico,True,1,scott10@adventure-works.com,989-555-0100,9860 Brookview Drive,,2002-04-18Z,2-5 Miles,Professionnel,Baccalauréat


In [91]:
df_to_load.shape

(18484, 28)

In [92]:
def upper_income(x):
    if not isinstance(x,str):
        return np.nan
    parts=x.split('-')
    if len(parts)==2:
        try:
            return float(parts[1])
        except:
            return np.nan
    elif len(parts)==1:
        try:
            return float(parts[0])
        except:
            return np.nan
    else:
        return np.nan

 

df_to_load=df_to_load['yearly_income'].apply(upper_income)

In [93]:
round(2,4)

2

In [94]:
df_to_load.to_sql(
    'dim_customer',
    etl_conn,
    if_exists='append',
    index=False
)

IntegrityError: (psycopg2.errors.NotNullViolation) null value in column "customer_alternate_key" of relation "dim_customer" violates not-null constraint
DETAIL:  Failing row contains (1, null, null, null, null, null, null, null, null, null, null, null, null, 50000.0000, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null).

[SQL: INSERT INTO dim_customer (yearly_income) VALUES (%(yearly_income__0)s), (%(yearly_income__1)s), (%(yearly_income__2)s), (%(yearly_income__3)s), (%(yearly_income__4)s), (%(yearly_income__5)s), (%(yearly_income__6)s), (%(yearly_income__7)s), (%(yearly_ ... 25586 characters truncated ... (yearly_income__996)s), (%(yearly_income__997)s), (%(yearly_income__998)s), (%(yearly_income__999)s)]
[parameters: {'yearly_income__0': 50000.0, 'yearly_income__1': 75000.0, 'yearly_income__2': 100000.0, 'yearly_income__3': 50000.0, 'yearly_income__4': 75000.0, 'yearly_income__5': None, 'yearly_income__6': 75000.0, 'yearly_income__7': 25000.0, 'yearly_income__8': 75000.0, 'yearly_income__9': 25000.0, 'yearly_income__10': 100000.0, 'yearly_income__11': 75000.0, 'yearly_income__12': 100000.0, 'yearly_income__13': 100000.0, 'yearly_income__14': 25000.0, 'yearly_income__15': 50000.0, 'yearly_income__16': 50000.0, 'yearly_income__17': None, 'yearly_income__18': 25000.0, 'yearly_income__19': 25000.0, 'yearly_income__20': 75000.0, 'yearly_income__21': 75000.0, 'yearly_income__22': 50000.0, 'yearly_income__23': 25000.0, 'yearly_income__24': 50000.0, 'yearly_income__25': 25000.0, 'yearly_income__26': 75000.0, 'yearly_income__27': 25000.0, 'yearly_income__28': 25000.0, 'yearly_income__29': 50000.0, 'yearly_income__30': 75000.0, 'yearly_income__31': None, 'yearly_income__32': 25000.0, 'yearly_income__33': 25000.0, 'yearly_income__34': 50000.0, 'yearly_income__35': 100000.0, 'yearly_income__36': 50000.0, 'yearly_income__37': None, 'yearly_income__38': 75000.0, 'yearly_income__39': 75000.0, 'yearly_income__40': 50000.0, 'yearly_income__41': None, 'yearly_income__42': 100000.0, 'yearly_income__43': 50000.0, 'yearly_income__44': 25000.0, 'yearly_income__45': 75000.0, 'yearly_income__46': 75000.0, 'yearly_income__47': 25000.0, 'yearly_income__48': None, 'yearly_income__49': None ... 900 parameters truncated ... 'yearly_income__950': 75000.0, 'yearly_income__951': 25000.0, 'yearly_income__952': 50000.0, 'yearly_income__953': 50000.0, 'yearly_income__954': 100000.0, 'yearly_income__955': 75000.0, 'yearly_income__956': 25000.0, 'yearly_income__957': 75000.0, 'yearly_income__958': 75000.0, 'yearly_income__959': 75000.0, 'yearly_income__960': 75000.0, 'yearly_income__961': 50000.0, 'yearly_income__962': 25000.0, 'yearly_income__963': 50000.0, 'yearly_income__964': 75000.0, 'yearly_income__965': 50000.0, 'yearly_income__966': 75000.0, 'yearly_income__967': 75000.0, 'yearly_income__968': 75000.0, 'yearly_income__969': 25000.0, 'yearly_income__970': None, 'yearly_income__971': 50000.0, 'yearly_income__972': 75000.0, 'yearly_income__973': 100000.0, 'yearly_income__974': 25000.0, 'yearly_income__975': 25000.0, 'yearly_income__976': 75000.0, 'yearly_income__977': 75000.0, 'yearly_income__978': 25000.0, 'yearly_income__979': 50000.0, 'yearly_income__980': 25000.0, 'yearly_income__981': 100000.0, 'yearly_income__982': 75000.0, 'yearly_income__983': 50000.0, 'yearly_income__984': 50000.0, 'yearly_income__985': 50000.0, 'yearly_income__986': 50000.0, 'yearly_income__987': 75000.0, 'yearly_income__988': 50000.0, 'yearly_income__989': 50000.0, 'yearly_income__990': 75000.0, 'yearly_income__991': 25000.0, 'yearly_income__992': None, 'yearly_income__993': 25000.0, 'yearly_income__994': 50000.0, 'yearly_income__995': None, 'yearly_income__996': 75000.0, 'yearly_income__997': 75000.0, 'yearly_income__998': None, 'yearly_income__999': 25000.0}]
(Background on this error at: https://sqlalche.me/e/20/gkpj)