In [21]:
import pandas as pd
import polars as pl
import duckdb
import pyarrow as pa

# Create a sample Pandas DataFrame backed by Arrow
pdf = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie', 'David'],
    'age': [25, 30, 35, 40],
    'salary': [50000, 60000, 75000, 90000]
})

# Convert Pandas DataFrame to PyArrow Table (zero-copy)
arrow_table = pa.Table.from_pandas(pdf)

# Convert to Polars DataFrame (zero-copy)
pldf = pl.from_arrow(arrow_table)

pldf_transformed = pldf.with_columns([
    pl.when(pl.col('age') < 35)
    .then(pl.lit('Junior'))  # Use pl.lit() for string literals
    .otherwise(pl.lit('Senior'))  # Use pl.lit() for string literals
    .alias('age_category')
])
print(pldf_transformed)
arrow_table_transformed = pldf_transformed.to_arrow()

# Connect to an in-memory DuckDB database
con = duckdb.connect(':memory:')

# Register the Arrow table as a DuckDB table (zero-copy)
con.register('employee_table', arrow_table_transformed)

# Perform a transformation in DuckDB
result_df = con.execute("""
    SELECT name, age_category, 
        CASE 
            WHEN salary < 60000 THEN 'Junior'
            WHEN salary BETWEEN 60000 AND 80000 THEN 'Mid-level'
            ELSE 'Senior'
        END as salary_band
    FROM employee_table
""").df()
con.close()

print(result_df)

shape: (4, 4)
┌─────────┬─────┬────────┬──────────────┐
│ name    ┆ age ┆ salary ┆ age_category │
│ ---     ┆ --- ┆ ---    ┆ ---          │
│ str     ┆ i64 ┆ i64    ┆ str          │
╞═════════╪═════╪════════╪══════════════╡
│ Alice   ┆ 25  ┆ 50000  ┆ Young        │
│ Bob     ┆ 30  ┆ 60000  ┆ Young        │
│ Charlie ┆ 35  ┆ 75000  ┆ Senior       │
│ David   ┆ 40  ┆ 90000  ┆ Senior       │
└─────────┴─────┴────────┴──────────────┘
      name age_category salary_band
0    Alice        Young      Junior
1      Bob        Young   Mid-level
2  Charlie       Senior   Mid-level
3    David       Senior      Senior
