<h3>Amendments Log</h3>
<table style="width:100%">
  <thead>
    <tr>
      <th style="text-align:left">Version</th>
      <th style="text-align:left">Amended By</th>
      <th style="text-align:left">Date</th>
      <th style="text-align:left">Description</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>1.0</td>
      <td>Gary Manley</td>
      <td>2025-11-30</td>
      <td>Initial Version</td>
    </tr>
  </tbody>
</table>

In [1]:
# 1. SETUP & IMPORTS
import duckdb
import pandas as pd
import numpy as np
import os
import sys
from datetime import datetime, timedelta
from dotenv import load_dotenv

# Load Env
vLocalEnvPath = r"C:/Users/garym/Documents/GitHub/MovieReleases/.env"
if os.path.exists(vLocalEnvPath):
    load_dotenv(dotenv_path=vLocalEnvPath)
else:
    load_dotenv()

vMdToken = os.getenv("MOTHERDUCK_TOKEN")
if not vMdToken: raise RuntimeError("MOTHERDUCK_TOKEN missing")

# Connect
print("Connecting to MotherDuck...")
vCon = duckdb.connect(f"md:?motherduck_token={vMdToken}")

Connecting to MotherDuck...


In [2]:
# PARAMETERS / CONSTANTS
cNotebookName = "process_date_dim.ipynb"

# Config: Start Date + 2 Years ahead
vStartDate = "2025-01-01"
vYearsAhead = 2
vEndDate = (datetime.now() + timedelta(days=vYearsAhead*365)).strftime('%Y-%m-%d')

## 2. Generate Date Range & Attributes
We use Pandas to generate the range and calculate all standard calendar attributes.

In [None]:
print(f"Generating Dates from {vStartDate} to {vEndDate}...")

# Generate Base Range
dfDates = pd.DataFrame({"date_actual": pd.date_range(start=vStartDate, end=vEndDate)})

# --- 1. CORE ATTRIBUTES ---
# SK format: YYYYMMDD (Integer)
dfDates["sk_date"] = dfDates["date_actual"].dt.strftime('%Y%m%d').astype(int)

dfDates["year_actual"] = dfDates["date_actual"].dt.year
dfDates["month_actual"] = dfDates["date_actual"].dt.month
dfDates["day_actual"] = dfDates["date_actual"].dt.day
dfDates["quarter_actual"] = dfDates["date_actual"].dt.quarter
dfDates["day_of_week"] = dfDates["date_actual"].dt.dayofweek + 1 # 1=Mon, 7=Sun

# Names
dfDates["day_name"] = dfDates["date_actual"].dt.day_name()
dfDates["month_name"] = dfDates["date_actual"].dt.month_name()
dfDates["month_name_short"] = dfDates["date_actual"].dt.strftime('%b')

# ISO Calendar (Week Number)
dfDates["week_of_year"] = dfDates["date_actual"].dt.isocalendar().week.astype(int)

# Weekend Flag
dfDates["is_weekend"] = dfDates["day_of_week"].isin([6, 7])

# --- 2. RELATIVE OFFSETS (Time Intelligence) ---
# Allow reporting tools to easily filter "Last Week" (-1) or "Next Month" (+1)

vCurrentDate = pd.to_datetime(datetime.now().date())
vCurrentYear = vCurrentDate.year
vCurrentMonth = vCurrentDate.year * 12 + vCurrentDate.month

# Day Offset
dfDates["day_offset"] = (dfDates["date_actual"] - vCurrentDate).dt.days

# Year Offset
dfDates["year_offset"] = dfDates["year_actual"] - vCurrentYear

# Month Offset (Calculated as total months from year 0 difference)
dfDates["month_index"] = dfDates["year_actual"] * 12 + dfDates["month_actual"]
dfDates["month_offset"] = dfDates["month_index"] - vCurrentMonth

# Drop helper col
dfDates = dfDates.drop(columns=["month_index"])

# --- 3. CURRENT FLAGS ---
dfDates["is_current_day"] = dfDates["day_offset"] == 0
dfDates["is_current_month"] = dfDates["month_offset"] == 0
dfDates["is_current_year"] = dfDates["year_offset"] == 0

print(f"Generated {len(dfDates)} rows.")
#dfDates.head()

## 3. Load to Silver
Full replace of the `silver.date_dim` table.

In [5]:
vTargetTable = "MovieReleases.silver.date_dim"

# Ensure Schema
vCon.sql("CREATE SCHEMA IF NOT EXISTS MovieReleases.silver")

# Register DataFrame
vCon.register('v_stage_date_dim', dfDates)

# Full Replace
print(f"Overwriting {vTargetTable}...")
vCon.sql(f"CREATE OR REPLACE TABLE {vTargetTable} AS SELECT * FROM v_stage_date_dim")

print("Success.")
vCon.close()

Overwriting MovieReleases.silver.date_dim...
Success.
