# Build dim_date

Purpose:
- Create a reusable calendar date dimension
- Support monthly churn trends and time-based KPIs
- Avoid reliance on Power BI auto-date tables

In [2]:
import pandas as pd

date_range = pd.date_range(
    start="2019-01-01",
    end="2023-12-31",
    freq="D"
)

dim_date = pd.DataFrame({"date": date_range})

In [3]:
dim_date["year"] = dim_date["date"].dt.year
dim_date["month"] = dim_date["date"].dt.month
dim_date["month_name"] = dim_date["date"].dt.month_name()
dim_date["quarter"] = dim_date["date"].dt.to_period("Q").astype(str)

In [4]:
dim_date["year_month"] = dim_date["date"].dt.to_period("M").astype(str)

In [5]:
dim_date = dim_date[
    ["date", "year", "quarter", "month", "month_name", "year_month"]
]

In [6]:
dim_date.head()
dim_date.tail()
dim_date.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1826 entries, 0 to 1825
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        1826 non-null   datetime64[ns]
 1   year        1826 non-null   int32         
 2   quarter     1826 non-null   object        
 3   month       1826 non-null   int32         
 4   month_name  1826 non-null   object        
 5   year_month  1826 non-null   object        
dtypes: datetime64[ns](1), int32(2), object(3)
memory usage: 71.5+ KB


In [7]:
dim_date.to_csv(
    "../data/processed/dim_date.csv",
    index=False
)

Notes:
- A daily date dimension was created to support flexible monthly aggregation and time-based analysis.
- The date range intentionally exceeds the analysis window to avoid edge effects.