In [None]:
#| default_exp utils_sync_tests

In [None]:
from fh_saas.utils_sync import sync_external_data, sync_incremental
from fh_saas.utils_graphql import GraphQLClient
from fh_saas.utils_api import AsyncAPIClient
import polars as pl
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker

## Test sync_external_data

In [None]:
@pytest.mark.asyncio
async def test_sync_external_data_full_pipeline():
    """Test full sync pipeline with 3 batches"""
    # Setup in-memory database
    db_uri = 'sqlite:///:memory:'
    engine = sa.create_engine(db_uri)
    metadata = sa.MetaData()
    
    # Create target table
    users_table = sa.Table(
        'users', metadata,
        sa.Column('id', sa.Integer, primary_key=True),
        sa.Column('name', sa.String),
        sa.Column('email', sa.String)
    )
    metadata.create_all(engine)
    
    # Mock GraphQL client with 3 batches
    mock_api_client = AsyncMock(spec=AsyncAPIClient)
    client = GraphQLClient(mock_api_client)
    
    # Generator that yields 3 batches
    async def mock_generator():
        yield [
            {'user_id': 1, 'user_name': 'Alice', 'user_email': 'alice@example.com'},
            {'user_id': 2, 'user_name': 'Bob', 'user_email': 'bob@example.com'}
        ]
        yield [
            {'user_id': 3, 'user_name': 'Charlie', 'user_email': 'charlie@example.com'}
        ]
        yield [
            {'user_id': 4, 'user_name': 'Diana', 'user_email': 'diana@example.com'},
            {'user_id': 5, 'user_name': 'Eve', 'user_email': 'eve@example.com'}
        ]
    
    # Patch fetch_pages_generator
    with patch.object(client, 'fetch_pages_generator', return_value=mock_generator()):
        stats = await sync_external_data(
            client=client,
            query_template='query { users { id name email } }',
            variables={},
            items_path=['data', 'users'],
            cursor_path=['data', 'cursor'],
            table_name='users',
            key_col='id',
            db_uri=db_uri,
            column_map={
                'user_id': 'id',
                'user_name': 'name',
                'user_email': 'email'
            }
        )
    
    # Verify stats
    assert stats['total_records'] == 5
    assert stats['batches'] == 3
    
    # Verify data in database
    Session = sessionmaker(bind=engine)
    session = Session()
    
    result = session.execute(sa.select(users_table).order_by(users_table.c.id))
    rows = result.fetchall()
    
    assert len(rows) == 5
    assert rows[0][1] == 'Alice'  # name column
    assert rows[2][2] == 'charlie@example.com'  # email column
    assert rows[4][1] == 'Eve'
    
    session.close()

In [None]:
@pytest.mark.asyncio
async def test_sync_external_data_with_type_conversion():
    """Test sync with type conversions (dates, booleans)"""
    # Setup database
    db_uri = 'sqlite:///:memory:'
    engine = sa.create_engine(db_uri)
    metadata = sa.MetaData()
    
    # Create table with date column
    events_table = sa.Table(
        'events', metadata,
        sa.Column('id', sa.Integer, primary_key=True),
        sa.Column('name', sa.String),
        sa.Column('active', sa.Boolean),
        sa.Column('created_date', sa.Date)
    )
    metadata.create_all(engine)
    
    # Mock client
    mock_api_client = AsyncMock(spec=AsyncAPIClient)
    client = GraphQLClient(mock_api_client)
    
    async def mock_generator():
        yield [
            {'id': 1, 'name': 'Event 1', 'active': 'true', 'created_date': '2024-01-15'},
            {'id': 2, 'name': 'Event 2', 'active': 'false', 'created_date': '2024-02-20'}
        ]
    
    with patch.object(client, 'fetch_pages_generator', return_value=mock_generator()):
        stats = await sync_external_data(
            client=client,
            query_template='query { events { id name active created_date } }',
            variables={},
            items_path=['data', 'events'],
            cursor_path=['data', 'cursor'],
            table_name='events',
            key_col='id',
            db_uri=db_uri,
            type_map={
                'active': pl.Boolean,
                'created_date': pl.Date
            }
        )
    
    # Verify
    assert stats['total_records'] == 2
    
    Session = sessionmaker(bind=engine)
    session = Session()
    result = session.execute(sa.select(events_table).order_by(events_table.c.id))
    rows = result.fetchall()
    
    assert rows[0][2] == True  # active column
    assert rows[1][2] == False
    assert str(rows[0][3]) == '2024-01-15'  # created_date
    
    session.close()

In [None]:
@pytest.mark.asyncio
async def test_sync_external_data_upsert_updates_existing():
    """Test that upsert updates existing records"""
    # Setup database with existing data
    db_uri = 'sqlite:///:memory:'
    engine = sa.create_engine(db_uri)
    metadata = sa.MetaData()
    
    users_table = sa.Table(
        'users', metadata,
        sa.Column('id', sa.Integer, primary_key=True),
        sa.Column('name', sa.String),
        sa.Column('email', sa.String)
    )
    metadata.create_all(engine)
    
    # Insert existing data
    Session = sessionmaker(bind=engine)
    session = Session()
    session.execute(users_table.insert(), [
        {'id': 1, 'name': 'Old Alice', 'email': 'old_alice@example.com'},
        {'id': 2, 'name': 'Old Bob', 'email': 'old_bob@example.com'}
    ])
    session.commit()
    session.close()
    
    # Mock client with updated data
    mock_api_client = AsyncMock(spec=AsyncAPIClient)
    client = GraphQLClient(mock_api_client)
    
    async def mock_generator():
        yield [
            {'id': 1, 'name': 'New Alice', 'email': 'new_alice@example.com'},
            {'id': 3, 'name': 'Charlie', 'email': 'charlie@example.com'}
        ]
    
    with patch.object(client, 'fetch_pages_generator', return_value=mock_generator()):
        stats = await sync_external_data(
            client=client,
            query_template='query { users { id name email } }',
            variables={},
            items_path=['data', 'users'],
            cursor_path=['data', 'cursor'],
            table_name='users',
            key_col='id',
            db_uri=db_uri
        )
    
    # Verify update
    session = Session()
    result = session.execute(sa.select(users_table).order_by(users_table.c.id))
    rows = result.fetchall()
    
    assert len(rows) == 3  # 2 existing + 1 new
    assert rows[0][1] == 'New Alice'  # Updated
    assert rows[0][2] == 'new_alice@example.com'
    assert rows[1][1] == 'Old Bob'  # Unchanged
    assert rows[2][1] == 'Charlie'  # New
    
    session.close()

## Test sync_incremental

In [None]:
@pytest.mark.asyncio
async def test_sync_incremental_adds_timestamp():
    """Test that incremental sync adds last_sync_time to variables and returns new timestamp"""
    # Setup database
    db_uri = 'sqlite:///:memory:'
    engine = sa.create_engine(db_uri)
    metadata = sa.MetaData()
    
    users_table = sa.Table(
        'users', metadata,
        sa.Column('id', sa.Integer, primary_key=True),
        sa.Column('name', sa.String)
    )
    metadata.create_all(engine)
    
    # Mock client
    mock_api_client = AsyncMock(spec=AsyncAPIClient)
    client = GraphQLClient(mock_api_client)
    
    async def mock_generator():
        yield [
            {'id': 1, 'name': 'Alice'}
        ]
    
    with patch.object(client, 'fetch_pages_generator', return_value=mock_generator()):
        stats = await sync_incremental(
            client=client,
            query_template='query($cursor: String, $last_sync: DateTime!) { users(after: $cursor, where: {updated_at: {_gt: $last_sync}}) { id name } }',
            last_sync_time='2024-01-15T10:00:00Z',
            items_path=['data', 'users'],
            cursor_path=['data', 'cursor'],
            table_name='users',
            key_col='id',
            db_uri=db_uri
        )
    
    # Verify stats include new timestamp
    assert 'last_sync_time' in stats
    assert stats['total_records'] == 1
    assert 'T' in stats['last_sync_time']  # ISO format
    assert stats['last_sync_time'].endswith('Z')  # UTC

In [None]:
@pytest.mark.asyncio
async def test_sync_incremental_no_new_records():
    """Test incremental sync when no new records exist"""
    # Setup database
    db_uri = 'sqlite:///:memory:'
    engine = sa.create_engine(db_uri)
    metadata = sa.MetaData()
    
    users_table = sa.Table(
        'users', metadata,
        sa.Column('id', sa.Integer, primary_key=True),
        sa.Column('name', sa.String)
    )
    metadata.create_all(engine)
    
    # Mock client with empty result
    mock_api_client = AsyncMock(spec=AsyncAPIClient)
    client = GraphQLClient(mock_api_client)
    
    async def mock_generator():
        # Empty generator
        return
        yield  # Never reached
    
    with patch.object(client, 'fetch_pages_generator', return_value=mock_generator()):
        stats = await sync_incremental(
            client=client,
            query_template='query($cursor: String, $last_sync: DateTime!) { users(after: $cursor, where: {updated_at: {_gt: $last_sync}}) { id name } }',
            last_sync_time='2024-01-15T10:00:00Z',
            items_path=['data', 'users'],
            cursor_path=['data', 'cursor'],
            table_name='users',
            key_col='id',
            db_uri=db_uri
        )
    
    # Verify no records synced
    assert stats['total_records'] == 0
    assert stats['batches'] == 0
    assert 'last_sync_time' in stats