---
title: Raw ST to DB
description: write the clean ST to the db subset to the raw samples.
project: database_etl
status: closed
conclusion: "new db has been created, table ST successfully written. Found that parquet files are much easier for duckdb to read than csv, advise using them as the main intermediary"
---

In [None]:
from database_etl.definitions import DB_PATH, DATA_DIR
import duckdb as db
from pathlib import Path

con = db.connect(DB_PATH)
clean_st_path = DATA_DIR / "clean_sample_tracker.parquet"
overwrite: bool = True


In [None]:
def load_st(con: db.DuckDBPyConnection, clean_st_path: Path):
    con.execute(
        f"""--sql
    create table st (
        pk integer primary key,
        detection varchar not null,
        wine varchar,
        vintage integer,
        sampler varchar,
        samplecode varchar not null unique,
        open_date varchar,
        sampled_date varchar,
        added_to_cellartracker varchar,
        notes varchar,
        size float,
    );
    insert into st
        select
            *
        from
            read_parquet('{clean_st_path}')
        where
            detection = 'raw'
            ;
    """
    )


if overwrite:
    try:
        con.sql(
            """--sql
        drop table st cascade;
        """
        )
        load_st(con=con, clean_st_path=clean_st_path)
    except db.CatalogException as e:
        con.close()
        del con
        raise e

con.sql(
    """--sql

select
    *
from
    st
limit 3
"""
).pl()

con.close()
del con
