In [None]:
#| default_exp utils_graphql

In [None]:
#| export

from __future__ import annotations
from fh_saas.utils_api import AsyncAPIClient
from typing import AsyncGenerator, Dict, Any, List, Optional
import logging

logger = logging.getLogger(__name__)

In [None]:
from nbdev.showdoc import show_doc

## GraphQLClient

GraphQL client with streaming pagination - yields batches of data instead of loading entire dataset into memory.

In [None]:
#| export

class GraphQLClient:
    """
    GraphQL client with streaming pagination for memory-efficient data fetching.
    
    Uses async generators to yield data page-by-page instead of accumulating
    full result sets in memory. Ideal for large datasets (millions of rows).
    
    Example:
        ```python
        from fh_saas.utils_api import AsyncAPIClient, bearer_token_auth
        
        async with AsyncAPIClient(
            'https://api.example.com/graphql',
            auth_headers=bearer_token_auth('TOKEN')
        ) as http_client:
            
            client = GraphQLClient(http_client)
            
            # Single query
            result = await client.execute_query(
                query='{ users { id name } }'
            )
            
            # Paginated streaming
            async for batch in client.fetch_pages_generator(
                query_template='''
                    query($cursor: String) {
                        users(after: $cursor) {
                            nodes { id name email }
                            pageInfo { hasNextPage endCursor }
                        }
                    }
                ''',
                variables={'cursor': None},
                items_path=['data', 'users', 'nodes'],
                cursor_path=['data', 'users', 'pageInfo', 'endCursor']
            ):
                print(f"Processing batch of {len(batch)} records")
                # Process batch immediately, never load full dataset
        ```
    """
    
    def __init__(
        self,
        api_client: AsyncAPIClient, # Initialized AsyncAPIClient instance
        endpoint: str = '' # GraphQL endpoint path (default: '' for base URL)
    ):
        self.api_client = api_client
        self.endpoint = endpoint
    
    async def execute_query(
        self,
        query: str, # GraphQL query string
        variables: dict = None # Query variables
    ) -> Dict[str, Any]:
        """
        Execute a single GraphQL query.
        
        Args:
            query: GraphQL query string
            variables: Optional query variables
        
        Returns:
            Full JSON response from GraphQL endpoint
        
        Raises:
            ValueError: If GraphQL response contains 'errors' key
        """
        payload = {'query': query}
        if variables:
            payload['variables'] = variables
        
        response = await self.api_client.request(
            method='POST',
            endpoint=self.endpoint,
            json=payload
        )
        
        data = response.json()
        
        # Check for GraphQL errors
        if 'errors' in data:
            error_msg = data['errors'][0].get('message', 'Unknown GraphQL error')
            raise ValueError(f"GraphQL error: {error_msg}")
        
        return data
    
    async def execute_mutation(
        self,
        mutation: str, # GraphQL mutation string
        variables: dict = None # Mutation variables
    ) -> Dict[str, Any]:
        """
        Execute a GraphQL mutation (alias for execute_query).
        
        Args:
            mutation: GraphQL mutation string
            variables: Optional mutation variables
        
        Returns:
            Full JSON response from GraphQL endpoint
        """
        return await self.execute_query(mutation, variables)
    
    async def fetch_pages_generator(
        self,
        query_template: str, # GraphQL query with $variables placeholders
        variables: dict, # Initial variables (must include cursor key)
        items_path: list[str], # JSONPath to list of items (e.g., ['data', 'users', 'nodes'])
        cursor_path: list[str], # JSONPath to next cursor (e.g., ['data', 'users', 'pageInfo', 'endCursor'])
        has_next_path: list[str] = None, # Optional path to hasNextPage boolean (if None, checks cursor != None)
        cursor_var: str = 'cursor' # Variable name for cursor in query (default: 'cursor')
    ) -> AsyncGenerator[List[Dict], None]:
        """
        Stream paginated GraphQL data page-by-page using async generator.
        
        CRITICAL: Yields batches of data instead of accumulating in memory.
        For large datasets (millions of rows), this prevents OOM errors.
        
        Args:
            query_template: GraphQL query with $cursor variable
            variables: Initial variables (e.g., {'cursor': None})
            items_path: Path to data list in response (e.g., ['data', 'users', 'nodes'])
            cursor_path: Path to next cursor (e.g., ['data', 'users', 'pageInfo', 'endCursor'])
            has_next_path: Optional path to hasNextPage flag
            cursor_var: Cursor variable name in query
        
        Yields:
            List[Dict]: One batch of records per page
        
        Example:
            ```python
            async for batch in client.fetch_pages_generator(
                query_template='''
                    query($cursor: String) {
                        users(after: $cursor, first: 1000) {
                            nodes { id name }
                            pageInfo { hasNextPage endCursor }
                        }
                    }
                ''',
                variables={'cursor': None},
                items_path=['data', 'users', 'nodes'],
                cursor_path=['data', 'users', 'pageInfo', 'endCursor'],
                has_next_path=['data', 'users', 'pageInfo', 'hasNextPage']
            ):
                # Process this batch immediately
                df = pl.DataFrame(batch)
                df.write_database('staging_table', ...)
            ```
        """
        has_next = True
        page_count = 0
        
        while has_next:
            # Execute query with current cursor
            response = await self.execute_query(query_template, variables)
            
            # Extract items using path
            items = self._get_nested_value(response, items_path)
            if not items:
                logger.warning(f"No items found at path {items_path}")
                break
            
            page_count += 1
            logger.info(f"Fetched page {page_count} with {len(items)} items")
            
            # Yield batch (CRITICAL: do not accumulate)
            yield items
            
            # Check if there's a next page
            if has_next_path:
                has_next = self._get_nested_value(response, has_next_path)
            else:
                # Fallback: check if cursor exists
                next_cursor = self._get_nested_value(response, cursor_path)
                has_next = next_cursor is not None
            
            # Update cursor for next iteration
            if has_next:
                next_cursor = self._get_nested_value(response, cursor_path)
                variables[cursor_var] = next_cursor
    
    def _get_nested_value(
        self, 
        data: dict, 
        path: list[str]
    ) -> Any:
        """
        Extract nested value from dict using path list.
        
        Example:
            _get_nested_value({'a': {'b': {'c': 123}}}, ['a', 'b', 'c']) -> 123
        """
        result = data
        for key in path:
            if isinstance(result, dict) and key in result:
                result = result[key]
            else:
                return None
        return result

In [None]:
show_doc(GraphQLClient.__init__)

---

[source](https://github.com/abhisheksreesaila/fh-saas/blob/main/fh_saas/utils_graphql.py#L57){target="_blank" style="float:right; font-size:smaller"}

### GraphQLClient.__init__

>      GraphQLClient.__init__ (api_client:fh_saas.utils_api.AsyncAPIClient,
>                              endpoint:str='')

*Initialize self.  See help(type(self)) for accurate signature.*

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| api_client | AsyncAPIClient |  | Initialized AsyncAPIClient instance |
| endpoint | str |  | GraphQL endpoint path (default: '' for base URL) |

In [None]:
show_doc(GraphQLClient.execute_query)

---

[source](https://github.com/abhisheksreesaila/fh-saas/blob/main/fh_saas/utils_graphql.py#L65){target="_blank" style="float:right; font-size:smaller"}

### GraphQLClient.execute_query

>      GraphQLClient.execute_query (query:str, variables:dict=None)

*Execute a single GraphQL query.*

Args:
    query: GraphQL query string
    variables: Optional query variables

Returns:
    Full JSON response from GraphQL endpoint

Raises:
    ValueError: If GraphQL response contains 'errors' key

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| query | str |  | GraphQL query string |
| variables | dict | None | Query variables |
| **Returns** | **Dict** |  |  |

In [None]:
show_doc(GraphQLClient.execute_mutation)

---

[source](https://github.com/abhisheksreesaila/fh-saas/blob/main/fh_saas/utils_graphql.py#L102){target="_blank" style="float:right; font-size:smaller"}

### GraphQLClient.execute_mutation

>      GraphQLClient.execute_mutation (mutation:str, variables:dict=None)

*Execute a GraphQL mutation (alias for execute_query).*

Args:
    mutation: GraphQL mutation string
    variables: Optional mutation variables

Returns:
    Full JSON response from GraphQL endpoint

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| mutation | str |  | GraphQL mutation string |
| variables | dict | None | Mutation variables |
| **Returns** | **Dict** |  |  |

In [None]:
show_doc(GraphQLClient.fetch_pages_generator)

---

[source](https://github.com/abhisheksreesaila/fh-saas/blob/main/fh_saas/utils_graphql.py#L119){target="_blank" style="float:right; font-size:smaller"}

### GraphQLClient.fetch_pages_generator

>      GraphQLClient.fetch_pages_generator (query_template:str, variables:dict,
>                                           items_path:list[str],
>                                           cursor_path:list[str],
>                                           has_next_path:list[str]=None,
>                                           cursor_var:str='cursor')

*Stream paginated GraphQL data page-by-page using async generator.*

CRITICAL: Yields batches of data instead of accumulating in memory.
For large datasets (millions of rows), this prevents OOM errors.

Args:
    query_template: GraphQL query with $cursor variable
    variables: Initial variables (e.g., {'cursor': None})
    items_path: Path to data list in response (e.g., ['data', 'users', 'nodes'])
    cursor_path: Path to next cursor (e.g., ['data', 'users', 'pageInfo', 'endCursor'])
    has_next_path: Optional path to hasNextPage flag
    cursor_var: Cursor variable name in query

Yields:
    List[Dict]: One batch of records per page

Example:
    ```python
    async for batch in client.fetch_pages_generator(
        query_template='''
            query($cursor: String) {
                users(after: $cursor, first: 1000) {
                    nodes { id name }
                    pageInfo { hasNextPage endCursor }
                }
            }
        ''',
        variables={'cursor': None},
        items_path=['data', 'users', 'nodes'],
        cursor_path=['data', 'users', 'pageInfo', 'endCursor'],
        has_next_path=['data', 'users', 'pageInfo', 'hasNextPage']
    ):
        # Process this batch immediately
        df = pl.DataFrame(batch)
        df.write_database('staging_table', ...)
    ```

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| query_template | str |  | GraphQL query with $variables placeholders |
| variables | dict |  | Initial variables (must include cursor key) |
| items_path | list |  | JSONPath to list of items (e.g., ['data', 'users', 'nodes']) |
| cursor_path | list |  | JSONPath to next cursor (e.g., ['data', 'users', 'pageInfo', 'endCursor']) |
| has_next_path | list | None | Optional path to hasNextPage boolean (if None, checks cursor != None) |
| cursor_var | str | cursor | Variable name for cursor in query (default: 'cursor') |
| **Returns** | **AsyncGenerator** |  |  |

In [None]:
#| hide

import nbdev as nb
nb.nbdev_export()