# Step 4: Build dim_date (Daily Grain)

This notebook generates `dim_date` from the min/max purchase date.


In [None]:
import pandas as pd
from pathlib import Path

DATA_PATH = Path("../data/raw")
OUT_PATH = Path("../data/mart/dim_date.csv")

orders = pd.read_csv(DATA_PATH / "olist_orders_dataset.csv")
purchase_ts = pd.to_datetime(orders["order_purchase_timestamp"])
start_date = purchase_ts.min().normalize()
end_date = purchase_ts.max().normalize()

dates = pd.date_range(start=start_date, end=end_date, freq="D")
dim_date = pd.DataFrame({"date_id": dates})


In [None]:
dim_date["year"] = dim_date["date_id"].dt.year
dim_date["quarter"] = dim_date["date_id"].dt.quarter
dim_date["month_num"] = dim_date["date_id"].dt.month
dim_date["month_name"] = dim_date["date_id"].dt.strftime("%B")
dim_date["year_month"] = dim_date["date_id"].dt.strftime("%Y-%m")
dim_date["week_of_year"] = dim_date["date_id"].dt.isocalendar().week.astype(int)
dim_date["day_of_week"] = dim_date["date_id"].dt.isocalendar().day.astype(int)
dim_date["day_name"] = dim_date["date_id"].dt.strftime("%A")
dim_date["is_weekend"] = dim_date["day_of_week"].isin([6, 7]).astype(int)
dim_date["is_month_start"] = dim_date["date_id"].dt.is_month_start.astype(int)
dim_date["is_month_end"] = dim_date["date_id"].dt.is_month_end.astype(int)


In [None]:
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
dim_date.to_csv(OUT_PATH, index=False)
OUT_PATH
