In [None]:
import duckdb
import pandas as pd

In [5]:
DB_PATH = "../../data/warehouse/data.duckdb"
PARQUET_PATH = "../../data/curated/price/prices_clean.parquet"

con = duckdb.connect(DB_PATH)

In [10]:
df = pd.read_parquet(PARQUET_PATH)
df.head()


Unnamed: 0,symbol,trade_date,open,high,low,close,adj_close,volume,dividends,split_ratio
0,A,2017-01-03,45.93,46.75,45.74,46.49,43.47,1739600,0.0,0.0
1,A,2017-01-04,46.93,47.38,46.82,47.1,44.04,1821300,0.0,0.0
2,A,2017-01-05,47.05,47.07,46.36,46.54,43.52,1503700,0.0,0.0
3,A,2017-01-06,46.63,48.07,46.56,47.99,44.87,2883400,0.0,0.0
4,A,2017-01-09,48.01,48.56,47.91,48.14,45.01,2575300,0.0,0.0


In [12]:
print(f"Loaded prices parquet: {len(df):,} rows, {df['symbol'].nunique()} symbols")

Loaded prices parquet: 1,093,859 rows, 503 symbols


In [14]:
cols_keep = ["symbol", "trade_date", "open", "high", "low", "close", "adj_close", "volume"]
df = df[cols_keep].copy()
df["trade_date"] = pd.to_datetime(df["trade_date"]).dt.date

In [15]:
symbol_map = con.execute(
    """
    SELECT symbol, security_id FROM securities
    """
).fetchdf()
df = df.merge(symbol_map, on="symbol", how="left")

missing = df[df["security_id"].isna()]["symbol"].unique()
if len(missing) > 0:
    print(f"Missing symbols not found in securities table: {len(missing)}")
    print(missing[:10])



In [16]:
df = df[["security_id", "trade_date", "open", "high", "low", "close", "adj_close", "volume"]]
df = df.sort_values(["security_id", "trade_date"])

In [17]:
con.execute("BEGIN TRANSACTION")
con.execute(
    """
    INSERT OR REPLACE INTO prices
    SELECT * FROM df
    """
)
con.execute("COMMIT")

<_duckdb.DuckDBPyConnection at 0x7cc385e9dff0>

In [18]:
print(f"Successfully inserted {len(df):,} rows into 'prices'")
con.close()

Successfully inserted 1,093,859 rows into 'prices'
