In [None]:
import duckdb
import pandas as pd

# Create DuckDB connection (in-memory)
conn = duckdb.connect(':memory:')



In [None]:
#Create sample tables with data
def create_sample_data():
    """Create sample tables with test data"""
    
    # Create customers table
    conn.execute("""
        CREATE TABLE customers (
            customer_id INTEGER PRIMARY KEY,
            name VARCHAR,
            email VARCHAR,
            age INTEGER,
            city VARCHAR,
            signup_date DATE
        )
    """)
    
    # Create products table
    conn.execute("""
        CREATE TABLE products (
            product_id INTEGER PRIMARY KEY,
            product_name VARCHAR,
            category VARCHAR,
            price DECIMAL(10,2),
            in_stock BOOLEAN
        )
    """)
    
    # Create orders table
    conn.execute("""
        CREATE TABLE orders (
            order_id INTEGER PRIMARY KEY,
            customer_id INTEGER,
            product_id INTEGER,
            quantity INTEGER,
            order_date DATE,
            total_amount DECIMAL(10,2)
        )
    """)
    
    # Insert sample customers
    customers_data = [
        (1, 'John Doe', 'john@email.com', 30, 'New York', '2024-01-15'),
        (2, 'Jane Smith', 'jane@email.com', 25, 'Los Angeles', '2024-01-16'),
        (3, 'Bob Johnson', 'bob@email.com', 35, 'Chicago', '2024-01-17'),
        (4, 'Alice Brown', 'alice@email.com', 28, 'Houston', '2024-01-18'),
        (5, 'Charlie Wilson', 'charlie@email.com', 32, 'Phoenix', '2024-01-19'),
    ]
    
    for customer in customers_data:
        conn.execute("INSERT INTO customers VALUES (?, ?, ?, ?, ?, ?)", customer)
    
    # Insert sample products
    products_data = [
        (1, 'Laptop', 'Electronics', 999.99, True),
        (2, 'Phone', 'Electronics', 699.99, True),
        (3, 'Tablet', 'Electronics', 399.99, False),
        (4, 'Headphones', 'Electronics', 149.99, True),
        (5, 'Keyboard', 'Accessories', 79.99, True),
    ]
    
    for product in products_data:
        conn.execute("INSERT INTO products VALUES (?, ?, ?, ?, ?)", product)
    
    # Insert sample orders
    orders_data = [
        (1, 1, 1, 1, '2024-02-01', 999.99),
        (2, 2, 2, 1, '2024-02-02', 699.99),
        (3, 3, 4, 2, '2024-02-03', 299.98),
        (4, 1, 5, 1, '2024-02-04', 79.99),
        (5, 4, 1, 1, '2024-02-05', 999.99),
        (6, 2, 3, 1, '2024-02-06', 399.99),
        (7, 5, 2, 1, '2024-02-07', 699.99),
        (8, 3, 5, 3, '2024-02-08', 239.97),
    ]
    
    for order in orders_data:
        conn.execute("INSERT INTO orders VALUES (?, ?, ?, ?, ?, ?)", order)
    
    print(" Sample data created successfully!")



In [None]:
def get_all_tables_schema():
    """Get schema information for all tables and save as CSV"""
    
    # Get all table names
    tables = conn.execute("SHOW TABLES").fetchall()
    table_names = [table[0] for table in tables]
    
    schema_data = []
    
    for table_name in table_names:
        # Get column information for each table
        columns = conn.execute(f"DESCRIBE {table_name}").fetchall()
        
        for column in columns:
            schema_data.append({
                'table_name': table_name,
                'column_name': column[0],
                'data_type': column[1],
                'nullable': column[2] if len(column) > 2 else None,
                'key': column[3] if len(column) > 3 else None,
                'default': column[4] if len(column) > 4 else None
            })
    
    # Create DataFrame and save as CSV
    schema_df = pd.DataFrame(schema_data)
    schema_df.to_csv('data/database_schema.csv', index=False)
    
    print(f" Schema exported to 'data/database_schema.csv'")
    print(f" Found {len(table_names)} tables with {len(schema_data)} total columns")
    
    return schema_df

In [None]:
# Run the functions
create_sample_data()
schema_df = get_all_tables_schema()

# Display the schema
print("\n Database Schema:")
print(schema_df)

# Show table contents
print("\n Sample Data:")
for table in ['customers', 'products', 'orders']:
    print(f"\n{table.upper()} table:")
    df = conn.execute(f"SELECT * FROM {table} LIMIT 3").df()
    print(df)

print(f"\n Schema CSV file 'data/database_schema.csv' has been created!")