<h3>Amendments Log</h3>
<table style="width:100%">
  <thead>
    <tr>
      <th style="text-align:left">Version</th>
      <th style="text-align:left">Amended By</th>
      <th style="text-align:left">Date</th>
      <th style="text-align:left">Description</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>1.0</td>
      <td>Gary Manley</td>
      <td>2025-12-01</td>
      <td>Initial Version</td>
    </tr>
  </tbody>
</table>

In [None]:
# 1. SETUP & IMPORTS
import duckdb
import pandas as pd
import os
import sys
from dotenv import load_dotenv

# Load Utils
sys.path.append(os.getcwd())

# Load Env
vLocalEnvPath = r"C:/Users/garym/Documents/GitHub/MovieReleases/.env"
if os.path.exists(vLocalEnvPath):
    load_dotenv(dotenv_path=vLocalEnvPath)
else:
    load_dotenv()

vMdToken = os.getenv("MOTHERDUCK_TOKEN")
if not vMdToken: raise RuntimeError("MOTHERDUCK_TOKEN missing")

# Connect
print("Connecting to MotherDuck...")
vCon = duckdb.connect(f"md:?motherduck_token={vMdToken}")

In [None]:
# PARAMETERS / CONSTANTS
cNotebookName = "process_fact_film.ipynb"
vTargetTable = "MovieReleases.silver.film_release_fact"

## 2. Build Fact Table

**Logic:**
1. Read active Bronze records and deduplicate (same logic as Dim).
2. Join to `film_release_dim` to get `sk_film_release`.
3. Calculate `sk_date` directly from `release_date` (YYYYMMDD integer format).
4. Add measure `release_count = 1`.

In [None]:
print("Fetching active records from Bronze...")

# 1. Fetch Bronze & Dim
try:
    dfBronze = vCon.table("MovieReleases.bronze.uk_releases").df()
    dfDimFilm = vCon.table("MovieReleases.silver.film_release_dim").df()
except Exception as e:
    print(f"Error reading source tables: {e}")
    dfBronze = pd.DataFrame()

if not dfBronze.empty and not dfDimFilm.empty:
    # 2. Filter Active & Deduplicate (Bronze)
    # We must apply the exact same logic as the Dimension to ensure alignment
    dfActive = dfBronze[dfBronze['is_current_uda'] == True].copy()
    dfSorted = dfActive.sort_values(by='valid_from_uda', ascending=False)
    dfDedup = dfSorted.drop_duplicates(subset=['imdb_id_ref'], keep='first').copy()
    
    # 3. Join to get Movie SK
    # We join on the Business Key (imdb_id_ref)
    dfFact = pd.merge(
        dfDedup,
        dfDimFilm[['imdb_id_ref', 'sk_film_release']],
        on='imdb_id_ref',
        how='inner'
    )
    
    # 4. Calculate Date SK
    # Format YYYYMMDD as integer
    dfFact['sk_date'] = pd.to_datetime(dfFact['release_date']).dt.strftime('%Y%m%d').astype(int)
    
    # 5. Add Measures
    dfFact['release_count'] = 1
    
    # 6. Final Select
    dfFinal = dfFact[['sk_film_release', 'sk_date', 'release_count']]
    
    print(f"Generated {len(dfFinal)} fact rows.")
    
    # 7. Load to Silver (Replace)
    print(f"Loading to {vTargetTable}...")
    vCon.sql("CREATE SCHEMA IF NOT EXISTS MovieReleases.silver")
    vCon.register('v_stage_fact', dfFinal)
    vCon.sql(f"CREATE OR REPLACE TABLE {vTargetTable} AS SELECT * FROM v_stage_fact")
    
    print("Success.")
    vCon.sql(f"SELECT * FROM {vTargetTable} LIMIT 5").show()

else:
    print("Bronze or Dim table empty. Skipping Fact load.")

vCon.close()