# Data Operations

This notebook covers:
- Creating databases and schemas
- Creating tables
- Inserting and querying data
- Using `seed_table()` for easy test data

In [1]:
import snowflake.connector

from snowduck import seed_table, start_patch_snowflake

start_patch_snowflake(db_file="examples.duckdb", reset=True)

## Database & Schema Management

In [2]:
with snowflake.connector.connect() as conn, conn.cursor() as cursor:
    cursor.execute("CREATE DATABASE IF NOT EXISTS my_database")
    cursor.execute("CREATE DATABASE IF NOT EXISTS analytics_db")
    cursor.execute("SHOW DATABASES")
    databases = cursor.fetchall()
    print("Databases:", [db[1] for db in databases])

Databases: ['ANALYTICS_DB', 'MY_DATABASE', 'examples', 'system', 'temp']


In [3]:
with snowflake.connector.connect() as conn, conn.cursor() as cursor:
    cursor.execute("USE DATABASE my_database")
    cursor.execute("CREATE SCHEMA IF NOT EXISTS public")
    cursor.execute("CREATE SCHEMA IF NOT EXISTS staging")
    cursor.execute("SHOW SCHEMAS")
    schemas = cursor.fetchall()
    print("Schemas:", [s[1] for s in schemas])

Schemas: ['PUBLIC', 'STAGING']


## Data Seeding with `seed_table()`

The easiest way to populate test data:

In [4]:
import pandas as pd

employees_data = {
    "id": [1, 2, 3, 4, 5, 6],
    "name": ["Alice", "Bob", "Carol", "David", "Eve", "Frank"],
    "department": [
        "Engineering",
        "Sales",
        "Engineering",
        "Marketing",
        "Engineering",
        "Sales",
    ],
    "salary": [95000, 75000, 105000, 68000, 98000, 82000],
    "hire_date": pd.to_datetime(
        [
            "2020-01-15",
            "2019-03-22",
            "2018-07-10",
            "2021-09-01",
            "2020-11-20",
            "2019-05-14",
        ]
    ),
}

with snowflake.connector.connect() as conn:
    cursor = conn.cursor()
    cursor.execute("USE DATABASE my_database")
    cursor.execute("USE SCHEMA public")
    rows = seed_table(conn, "employees", employees_data)
    print(f"Seeded {rows} employees")

    cursor.execute("SELECT * FROM employees LIMIT 3")
    for row in cursor.fetchall():
        print(f"  {row[1]} - {row[2]} - ${row[3]:,}")

Seeded 6 employees
  Alice - Engineering - $95,000
  Bob - Sales - $75,000
  Carol - Engineering - $105,000


## INSERT with VALUES

In [5]:
with snowflake.connector.connect() as conn, conn.cursor() as cursor:
    cursor.execute("USE DATABASE my_database")
    cursor.execute("USE SCHEMA PUBLIC")
    cursor.execute("""
        CREATE OR REPLACE TABLE departments AS
        SELECT * FROM (VALUES
            ('Engineering', 3, 99333),
            ('Sales', 2, 78500),
            ('Marketing', 1, 68000)
        ) AS t(name, employee_count, avg_salary)
    """)
    cursor.execute("SELECT * FROM departments")
    for row in cursor.fetchall():
        print(f"  {row[0]}: {row[1]} employees, avg ${row[2]:,}")

  Engineering: 3 employees, avg $99,333
  Sales: 2 employees, avg $78,500
  Marketing: 1 employees, avg $68,000


## Information Schema

In [6]:
# Query tables from the same connection context where they were created
with snowflake.connector.connect() as conn, conn.cursor() as cursor:
    cursor.execute("USE DATABASE my_database")
    cursor.execute("USE SCHEMA PUBLIC")

    # Create a table so we have something to query
    cursor.execute("""
        CREATE OR REPLACE TABLE products (
            id INTEGER,
            name VARCHAR,
            price DECIMAL(10,2)
        )
    """)
    cursor.execute(
        "INSERT INTO products VALUES (1, 'Widget', 29.99), (2, 'Gadget', 49.99)"
    )

    # Now query information_schema in the same connection
    cursor.execute("""
        SELECT table_name, table_type
        FROM information_schema.tables
        WHERE table_schema = 'PUBLIC'
    """)
    print("Tables in PUBLIC schema:")
    for row in cursor.fetchall():
        print(f"  {row[0]} ({row[1]})")

Tables in PUBLIC schema:
  departments (BASE TABLE)
  employees (BASE TABLE)
  products (BASE TABLE)
