In [None]:
import polars as pl
from sqlalchemy import create_engine, text
from fh_saas.utils_polars_mapper import map_and_upsert, apply_schema

## Test Upsert with Staging Table

In [None]:
def test_map_and_upsert_insert():
    """Test staging table upsert - INSERT new rows"""
    
    # Create in-memory SQLite database (shared mode so multiple connections see the same DB)
    db_uri = 'sqlite:///file:memdb1?mode=memory&cache=shared&uri=true'
    engine = create_engine(db_uri)
    
    # Create target table (drop first to ensure clean state)
    with engine.connect() as conn:
        conn.execute(text("DROP TABLE IF EXISTS users"))
        conn.execute(text("""
            CREATE TABLE users (
                user_id INTEGER PRIMARY KEY,
                name TEXT,
                email TEXT
            )
        """))
        conn.commit()
    
    # Prepare test data
    json_data = [
        {'user_id_val': 1, 'ABC_1': 'Alice', 'email_addr': 'alice@example.com', 'extra': 'ignore'},
        {'user_id_val': 2, 'ABC_1': 'Bob', 'email_addr': 'bob@example.com', 'extra': 'ignore'}
    ]
    df = pl.DataFrame(json_data)
    
    # Execute upsert
    map_and_upsert(
        df=df,
        table_name='users',
        key_col='user_id',
        db_uri=db_uri,
        column_map={
            'user_id_val': 'user_id',
            'ABC_1': 'name',
            'email_addr': 'email'
        }
    )
    
    # Verify results
    with engine.connect() as conn:
        result = conn.execute(text("SELECT * FROM users ORDER BY user_id"))
        rows = result.fetchall()
        
        assert len(rows) == 2
        assert rows[0] == (1, 'Alice', 'alice@example.com')
        assert rows[1] == (2, 'Bob', 'bob@example.com')
    
    print("✅ Test passed: Map and upsert (INSERT)")

test_map_and_upsert_insert()

✅ Test passed: Map and upsert (INSERT)


In [None]:
def test_map_and_upsert_update():
    """Test staging table upsert - UPDATE existing rows"""
    
    # Create in-memory SQLite database (shared mode so multiple connections see the same DB)
    db_uri = 'sqlite:///file:memdb2?mode=memory&cache=shared&uri=true'
    engine = create_engine(db_uri)
    
    # Create target table with existing data (drop first to ensure clean state)
    with engine.connect() as conn:
        conn.execute(text("DROP TABLE IF EXISTS users"))
        conn.execute(text("""
            CREATE TABLE users (
                user_id INTEGER PRIMARY KEY,
                name TEXT,
                email TEXT
            )
        """))
        conn.execute(text("""
            INSERT INTO users (user_id, name, email) VALUES
            (1, 'Alice OLD', 'alice_old@example.com'),
            (2, 'Bob OLD', 'bob_old@example.com')
        """))
        conn.commit()
    
    # Prepare updated data
    json_data = [
        {'user_id': 1, 'name': 'Alice NEW', 'email': 'alice_new@example.com'},
        {'user_id': 2, 'name': 'Bob NEW', 'email': 'bob_new@example.com'}
    ]
    df = pl.DataFrame(json_data)
    
    # Execute upsert (should UPDATE existing rows)
    map_and_upsert(
        df=df,
        table_name='users',
        key_col='user_id',
        db_uri=db_uri
    )
    
    # Verify rows were UPDATED (not duplicated)
    with engine.connect() as conn:
        result = conn.execute(text("SELECT * FROM users ORDER BY user_id"))
        rows = result.fetchall()
        
        assert len(rows) == 2  # Still only 2 rows (no duplicates)
        assert rows[0] == (1, 'Alice NEW', 'alice_new@example.com')
        assert rows[1] == (2, 'Bob NEW', 'bob_new@example.com')
    
    print("✅ Test passed: Map and upsert (UPDATE)")

test_map_and_upsert_update()

✅ Test passed: Map and upsert (UPDATE)


## Test Schema Transformations

In [None]:
def test_apply_schema_date():
    """Test date string conversion"""
    
    df = pl.DataFrame({
        'created_at': ['2024-01-15', '2024-01-16']
    })
    
    df = apply_schema(df, {'created_at': pl.Date})
    
    assert df.schema['created_at'] == pl.Date
    print("✅ Test passed: Apply schema (Date)")

test_apply_schema_date()

✅ Test passed: Apply schema (Date)


In [None]:
def test_apply_schema_boolean():
    """Test boolean string conversion"""
    
    df = pl.DataFrame({
        'is_active': ['true', 'false', 'True', 'False']
    })
    
    df = apply_schema(df, {'is_active': pl.Boolean})
    
    assert df.schema['is_active'] == pl.Boolean
    assert df['is_active'].to_list() == [True, False, True, False]
    print("✅ Test passed: Apply schema (Boolean)")

test_apply_schema_boolean()

✅ Test passed: Apply schema (Boolean)


In [None]:
def test_apply_schema_numeric():
    """Test numeric string conversion"""
    
    df = pl.DataFrame({
        'amount': ['123.45', '678.90'],
        'count': ['10', '20']
    })
    
    df = apply_schema(df, {
        'amount': pl.Float64,
        'count': pl.Int64
    })
    
    assert df.schema['amount'] == pl.Float64
    assert df.schema['count'] == pl.Int64
    assert df['amount'].to_list() == [123.45, 678.90]
    assert df['count'].to_list() == [10, 20]
    print("✅ Test passed: Apply schema (Numeric)")

test_apply_schema_numeric()

✅ Test passed: Apply schema (Numeric)


## Test Row Count Return Value

`map_and_upsert` now returns the actual number of rows affected by the database upsert operation.

In [None]:
def test_map_and_upsert_returns_row_count():
    """Test that map_and_upsert returns the number of rows affected"""
    
    db_uri = 'sqlite:///file:memdb_count?mode=memory&cache=shared&uri=true'
    engine = create_engine(db_uri)
    
    # Create target table
    with engine.connect() as conn:
        conn.execute(text("DROP TABLE IF EXISTS items"))
        conn.execute(text("""
            CREATE TABLE items (
                id INTEGER PRIMARY KEY,
                name TEXT
            )
        """))
        conn.commit()
    
    # Prepare test data
    json_data = [
        {'id': 1, 'name': 'Item A'},
        {'id': 2, 'name': 'Item B'},
        {'id': 3, 'name': 'Item C'}
    ]
    df = pl.DataFrame(json_data)
    
    # Execute upsert and capture return value
    rows_affected = map_and_upsert(
        df=df,
        table_name='items',
        key_col='id',
        db_uri=db_uri
    )
    
    # Verify return value is an integer representing rows affected
    assert isinstance(rows_affected, int), f"Expected int, got {type(rows_affected)}"
    assert rows_affected == 3, f"Expected 3 rows affected, got {rows_affected}"
    
    print(f"✅ Test passed: map_and_upsert returns row count ({rows_affected})")

test_map_and_upsert_returns_row_count()

✅ Test passed: map_and_upsert returns row count (3)


## Test type_map Parameter

`map_and_upsert` now accepts a `type_map` parameter for explicit column type casting before writing to the database. This prevents type mismatch errors when Polars infers incorrect types from nullable data.

In [None]:
def test_map_and_upsert_with_type_map():
    """Test type_map parameter for explicit column casting"""
    
    db_uri = 'sqlite:///file:memdb_typemap?mode=memory&cache=shared&uri=true'
    engine = create_engine(db_uri)
    
    # Create target table with INTEGER columns
    with engine.connect() as conn:
        conn.execute(text("DROP TABLE IF EXISTS accounts"))
        conn.execute(text("""
            CREATE TABLE accounts (
                id INTEGER PRIMARY KEY,
                balance_current INTEGER,
                balance_available INTEGER
            )
        """))
        conn.commit()
    
    # Prepare test data with None values (Polars may infer as String)
    json_data = [
        {'id': 1, 'balance_current': 1000, 'balance_available': 800},
        {'id': 2, 'balance_current': None, 'balance_available': 500},  # None value
        {'id': 3, 'balance_current': 2000, 'balance_available': None}   # None value
    ]
    df = pl.DataFrame(json_data)
    
    # Use type_map to ensure correct types
    rows_affected = map_and_upsert(
        df=df,
        table_name='accounts',
        key_col='id',
        db_uri=db_uri,
        type_map={
            'balance_current': pl.Int64,
            'balance_available': pl.Int64
        }
    )
    
    # Verify data was inserted correctly
    with engine.connect() as conn:
        result = conn.execute(text("SELECT * FROM accounts ORDER BY id"))
        rows = result.fetchall()
        
        assert len(rows) == 3
        assert rows[0] == (1, 1000, 800)
        assert rows[1] == (2, None, 500)  # None preserved
        assert rows[2] == (3, 2000, None)  # None preserved
    
    print(f"✅ Test passed: map_and_upsert with type_map ({rows_affected} rows)")

test_map_and_upsert_with_type_map()

✅ Test passed: map_and_upsert with type_map (3 rows)


In [None]:
def test_map_and_upsert_type_map_invalid_column():
    """Test that invalid columns in type_map are logged as warnings and skipped gracefully"""
    import logging
    
    db_uri = 'sqlite:///file:memdb_invalid?mode=memory&cache=shared&uri=true'
    engine = create_engine(db_uri)
    
    # Create target table
    with engine.connect() as conn:
        conn.execute(text("DROP TABLE IF EXISTS products"))
        conn.execute(text("""
            CREATE TABLE products (
                id INTEGER PRIMARY KEY,
                price REAL
            )
        """))
        conn.commit()
    
    # Prepare test data
    json_data = [
        {'id': 1, 'price': 9.99},
        {'id': 2, 'price': 19.99}
    ]
    df = pl.DataFrame(json_data)
    
    # type_map includes a column that doesn't exist in DataFrame
    # This should log a warning but not raise an error
    rows_affected = map_and_upsert(
        df=df,
        table_name='products',
        key_col='id',
        db_uri=db_uri,
        type_map={
            'price': pl.Float64,
            'nonexistent_column': pl.Int64  # Should be skipped with warning
        }
    )
    
    # Verify data was still inserted correctly
    with engine.connect() as conn:
        result = conn.execute(text("SELECT * FROM products ORDER BY id"))
        rows = result.fetchall()
        
        assert len(rows) == 2
        assert rows_affected == 2
    
    print(f"✅ Test passed: map_and_upsert handles invalid type_map columns gracefully")

test_map_and_upsert_type_map_invalid_column()

Column 'nonexistent_column' in type_map not found in DataFrame, skipping


✅ Test passed: map_and_upsert handles invalid type_map columns gracefully
