# Basalt Dataset SDK Demo

This notebook demonstrates how to use the Basalt Dataset SDK to interact with your Basalt datasets.

In [None]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))  # Needed to make notebook work in VSCode

os.environ["BASALT_BUILD"] = "development"

from basalt import Basalt

# Initialize the SDK
basalt = Basalt(
    api_key="sk-...",  # Replace with your API key
    log_level="debug"  # Optional: Set log level
)

## 1. Listing Available Datasets

Retrieve all datasets available in your workspace.

In [None]:
# List all datasets in the workspace
err, datasets = basalt.datasets.list()

if err:
    print(f"Error listing datasets: {err}")
else:
    print(f"Found {len(datasets)} datasets:")
    for i, dataset in enumerate(datasets):
        print(f"{i+1}. {dataset.name} (slug: {dataset.slug})")
        print(f"   - Description: {dataset.description if dataset.description else 'No description'}")
        print(f"   - Columns: {', '.join(dataset.columns)}")
    
    # Store the first dataset slug for later use (if available)
    first_dataset_slug = datasets[0].slug if datasets else None

## 2. Getting a Specific Dataset

Retrieve details for a specific dataset using its slug.

In [None]:
# Use the first dataset from the list or replace with a specific slug
dataset_slug = first_dataset_slug if 'first_dataset_slug' in locals() and first_dataset_slug else "your-dataset-slug"

err, dataset = basalt.datasets.get(dataset_slug)

if err:
    print(f"Error getting dataset: {err}")
else:
    print(f"Dataset details for '{dataset.name}'")
    print(f"Slug: {dataset.slug}")
    print(f"Description: {dataset.description if dataset.description else 'No description'}")
    print(f"Columns: {', '.join(dataset.columns)}")
    print(f"Number of rows: {len(dataset.rows)}")
    
    if dataset.rows:
        print("\nSample rows:")
        for i, row in enumerate(dataset.rows[:3]):  # Show up to 3 rows
            print(f"Row {i+1}:")
            print(f"  Values: {row.values}")
            if row.name:
                print(f"  Name: {row.name}")
            if row.idealOutput:
                print(f"  Ideal output: {row.idealOutput}")
            if row.metadata:
                print(f"  Metadata: {row.metadata}")

## 3. Adding a Row to a Dataset

Create a new row (item) in an existing dataset.

In [None]:
# Use the dataset from the previous example
if 'dataset' in locals() and dataset:
    # Build values for all columns in the dataset
    values = {}
    for column in dataset.columns:
        values[column] = f"Example value for {column} - {__import__('datetime').datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
    
    # Create the row
    err, row, warning = basalt.datasets.addRow(
        slug=dataset.slug,
        values=values,
        name="Notebook Example Row",
        ideal_output="This is an ideal output for this row",
        metadata={"source": "Jupyter notebook example", "timestamp": __import__('datetime').datetime.now().isoformat()}
    )
    
    if err:
        print(f"Error creating dataset row: {err}")
    else:
        print("Successfully created new dataset row:")
        print(f"Values: {row.values}")
        print(f"Name: {row.name}")
        print(f"Ideal output: {row.idealOutput}")
        print(f"Metadata: {row.metadata}")
        
        if warning:
            print(f"Warning: {warning}")
else:
    print("Please run the previous cell to get a dataset first")

## 4. Error Handling with Dataset SDK

Demonstrate proper error handling when working with datasets.

In [None]:
def safely_add_dataset_row(slug, values, name=None, ideal_output=None, metadata=None):
    """Safely add a row to a dataset with robust error handling"""
    try:
        err, row, warning = basalt.datasets.addRow(
            slug=slug,
            values=values,
            name=name,
            ideal_output=ideal_output,
            metadata=metadata
        )
        
        if err:
            print(f"Error creating dataset row: {err}")
            return None
            
        if warning:
            print(f"Warning: {warning}")
            
        return row
    except Exception as e:
        print(f"Unexpected error: {str(e)}")
        return None

# Test with a valid dataset
if 'dataset_slug' in locals() and dataset_slug:
    values = {"input": "Test input", "output": "Test output"}
    row = safely_add_dataset_row(dataset_slug, values, name="Error Handling Test")
    
    if row:
        print(f"Successfully created row: {row.name}")

# Test with an invalid dataset slug
print("\nTesting with invalid dataset slug:")
invalid_row = safely_add_dataset_row("non-existent-dataset", {"input": "Test input"})
print(f"Result with invalid slug: {invalid_row}")

# Test with missing required values
if 'dataset' in locals() and dataset and len(dataset.columns) > 0:
    print("\nTesting with missing required values:")
    # Deliberately create incomplete values dict
    incomplete_values = {column: "value" for column in list(dataset.columns)[1:]} if len(dataset.columns) > 1 else {}
    incomplete_row = safely_add_dataset_row(dataset.slug, incomplete_values)
    print(f"Result with incomplete values: {incomplete_row}")