# Basalt Dataset SDK Demo

This notebook demonstrates the asynchronous functionality of the Dataset SDK in the Basalt Python SDK, including:
- Listing all datasets
- Retrieving specific datasets by slug
- Adding rows to datasets
- Working with dataset metadata

This notebook uses primarily mock data for easy execution without API keys.

In [None]:
import os
import sys

sys.path.append(
    os.path.abspath(os.path.join(os.getcwd(), ".."))
)  # Needed to make notebook work in VSCode

os.environ["BASALT_BUILD"] = "development"

from basalt import Basalt, TelemetryConfig

# Initialize the SDK with modern API
# Note: Replace with your actual API key for real usage
telemetry = TelemetryConfig(
    service_name="dataset-demo",
    environment="development",
)

basalt = Basalt(
    api_key=os.getenv("BASALT_API_KEY", "sk-demo-key"),  # Replace with your API key
    telemetry_config=telemetry,
)

## Example 1: List All Datasets

This example demonstrates how to list all datasets asynchronously.

In [None]:
async def list_datasets():
    """
    List all datasets asynchronously.

    Returns a list of dataset objects on success.
    """
    try:
        datasets = await basalt.datasets.list()
        for dataset in datasets:
            # dataset.columns is a list of DatasetColumn objects
            [col.name for col in dataset.columns]
        return datasets
    except Exception:
        return []


# Run the async function
datasets = await list_datasets()

## Example 2: Get a Specific Dataset

This example demonstrates how to retrieve a specific dataset by its slug.

In [None]:
async def get_dataset(datasets):
    """
    Retrieve a specific dataset by its slug.

    Returns the dataset object with full details including rows.
    """
    if len(datasets) > 0:
        sample_dataset = datasets[0]
        try:
            dataset = await basalt.datasets.get(sample_dataset.slug)
            # dataset.columns is a list of DatasetColumn objects
            [col.name for col in dataset.columns]
            return sample_dataset, dataset
        except Exception:
            return sample_dataset, None
    else:
        return None, None


# Run the async function
sample_dataset, dataset = await get_dataset(datasets)

## Example 3: Add a Row to a Dataset

This example demonstrates how to add a new row to an existing dataset.

In [None]:
async def add_row(sample_dataset):
    """
    Add a new row to an existing dataset.

    Demonstrates how to add data with values, name, ideal_output, and metadata.
    Note: add_row returns a tuple of (DatasetRow, warning).
    """
    if sample_dataset:
        # Create some sample values for the dataset row
        # dataset.columns is a list of DatasetColumn objects with .name attribute
        column_names = [col.name for col in sample_dataset.columns]
        values = {column_name: f"Sample {column_name} value" for column_name in column_names}

        try:
            # add_row returns a tuple: (DatasetRow, optional_warning)
            row, warning = await basalt.datasets.add_row(
                slug=sample_dataset.slug,
                values=values,
                name="Async Sample Row",
                ideal_output="Expected output for this row",
                metadata={
                    "source": "async_example",
                    "type": "demo",
                    "notebook": "dataset_sdk_demo",
                },
            )

            if row.name:
                pass
            if warning:
                pass

            return row, warning
        except Exception:
            return None, None
    else:
        return None, None


# Run the async function
row_result, warning = await add_row(sample_dataset)

# Note about warnings: The add_row function returns a warning string if there are
# schema validation issues or other non-fatal problems. Always check for warnings
# in production code to ensure data quality.