<h3>Amendments Log</h3>
<table style="width:100%">
  <thead>
    <tr>
      <th style="text-align:left">Version</th>
      <th style="text-align:left">Amended By</th>
      <th style="text-align:left">Date</th>
      <th style="text-align:left">Description</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>1.6</td>
      <td>Gary Manley</td>
      <td>2025-12-06</td>
      <td>Updated PyVis options to enforce vertical text centering and text wrapping.</td>
    </tr>
    <tr>
      <td>1.5</td>
      <td>Gary Manley</td>
      <td>2025-12-06</td>
      <td>Refined Lineage: Explicit Silver notebook mapping, removed DQ report, added Excel Report data flow.</td>
    </tr>
    <tr>
      <td>1.4</td>
      <td>Gary Manley</td>
      <td>2025-12-06</td>
      <td>Updated PyVis layout to Hierarchical (Left-to-Right) and linked Jobs to Tables.</td>
    </tr>
    <tr>
      <td>1.3</td>
      <td>Gary Manley</td>
      <td>2025-12-06</td>
      <td>Added PyVis generation for interactive HTML lineage.</td>
    </tr>
    <tr>
      <td>1.2</td>
      <td>Gary Manley</td>
      <td>2025-12-06</td>
      <td>Added missing lineage edges for Bridge/Link tables.</td>
    </tr>
    <tr>
      <td>1.1</td>
      <td>Gary Manley</td>
      <td>2025-12-06</td>
      <td>Fixed Mermaid syntax for Gold Views (Unique Node IDs to prevent collapsing).</td>
    </tr>
    <tr>
      <td>1.0</td>
      <td>Gary Manley</td>
      <td>2025-12-06</td>
      <td>Initial Version: Generates Mermaid charts from metadata.</td>
    </tr>
  </tbody>
</table>

In [None]:
# 1. SETUP & IMPORTS
import duckdb
import pandas as pd
import os
import sys
from datetime import datetime
from dotenv import load_dotenv
from pyvis.network import Network  # pip install pyvis

# Load Env
vLocalEnvPath = r"C:/Users/garym/Documents/GitHub/MovieReleases/.env"
if os.path.exists(vLocalEnvPath):
    load_dotenv(dotenv_path=vLocalEnvPath)
else:
    load_dotenv()

vMdToken = os.getenv("MOTHERDUCK_TOKEN")
if not vMdToken: raise RuntimeError("MOTHERDUCK_TOKEN missing")

# Connect
print("Connecting to MotherDuck...")
vCon = duckdb.connect(f"md:?motherduck_token={vMdToken}")

In [None]:
# PARAMETERS / CONSTANTS
cNotebookName = "generate_docs.ipynb"
cOutputPathMd = "docs/Live_Architecture.md"
cOutputPathHtml = "docs/Live_Architecture.html"
cGoldConfigPath = "config/gold_views.csv"

# Manual Mapping for Silver Logic (Data Flow)
cSilverEdges = [
    ("bronze.uk_releases", "silver.film_release_dim", "Dedup & SK"),
    ("bronze.uk_releases", "silver.film_release_fact", "Join"),
    ("silver.film_release_dim", "silver.film_release_fact", "Join"),
    ("silver.date_dim", "silver.film_release_fact", "Join"),
    # Dimension Generation
    ("silver.film_release_dim", "silver.actor_dim", "Explode"),
    ("silver.film_release_dim", "silver.genre_dim", "Explode"),
    # Bridge/Link Generation
    ("silver.film_release_dim", "silver.film_actor_bridge", "Explode & Map"),
    ("silver.actor_dim", "silver.film_actor_bridge", "Join SK"),
    ("silver.film_release_dim", "silver.film_genre_bridge", "Explode & Map"),
    ("silver.genre_dim", "silver.film_genre_bridge", "Join SK")
]

# EXPLICIT MAPPING: Which Notebook generates which Table(s)
# This fixes the visual disconnect in PyVis
cJobToTableMap = {
    "process_date_dim.ipynb": ["silver.date_dim"],
    "process_dim_film.ipynb": ["silver.film_release_dim"],
    "process_bridge_actor.ipynb": [
        "silver.actor_dim", "silver.genre_dim", 
        "silver.film_actor_bridge", "silver.film_genre_bridge"
    ],
    "process_fact_film.ipynb": ["silver.film_release_fact"]
}

## 2. Generate Graphs (Mermaid & PyVis)
We construct both a static Mermaid string and an interactive PyVis HTML network.

In [None]:
# Initialize Mermaid String
vMermaidLines = [
    "```mermaid",
    "graph LR",
    "    %% Generated Automatically by generate_docs.ipynb",
    f"    %% Last Updated: {datetime.now().strftime('%Y-%m-%d %H:%M')}",
    ""
]

# Initialize PyVis Network
# HIERARCHICAL LAYOUT: Forces Left-to-Right flow, grouping layers naturally
vNet = Network(height="800px", width="100%", bgcolor="#ffffff", font_color="black", directed=True)

# UPDATE: Added 'nodes' block to enforce vertical centering and text wrapping
vNet.set_options("""
var options = {
  "nodes": {
    "heightConstraint": {
      "valign": "middle"
    },
    "widthConstraint": {
      "maximum": 200
    }
  },
  "layout": {
    "hierarchical": {
      "enabled": true,
      "direction": "LR",
      "sortMethod": "directed",
      "levelSeparation": 250
    }
  },
  "edges": {
    "smooth": {
      "type": "cubicBezier",
      "forceDirection": "horizontal"
    },
    "color": {"inherit": true},
    "arrows": {"to": {"enabled": true, "scaleFactor": 0.5}}
  }
}
""")

# --- HELPER: ADD EDGE ---
def f_add_node(vId, vGroup, vLabel=None):
    vColorMap = {
        'notebook': '#9b59b6', # Purple
        'landing': '#3498db',  # Blue
        'bronze': '#CD7F32',   # Bronze (Actual Bronze Color)
        'silver': '#C0C0C0',   # Silver (Standard Silver)
        'gold': '#FFD700',     # Gold (Standard Gold)
        'artifact': '#e74c3c'  # Red (File)
    }
    vShapeMap = {
        'notebook': 'box',
        'source': 'ellipse',
        'gold': 'star',
        'artifact': 'box'
    }
    
    vNet.add_node(
        vId, 
        label=vLabel if vLabel else vId, 
        # Remove 'group' so custom color is not overridden by default group styles
        color=vColorMap.get(vGroup, '#ccc'),
        shape=vShapeMap.get(vGroup, 'database'), # Default to DB cylinder
        title=vId # Tooltip
    )

def f_add_edge(vSrc, vTgt, vLabel=None, vDashed=False):
    vProps = {"title": vLabel} if vLabel else {}
    if vDashed:
        vProps["dashes"] = True
    vNet.add_edge(vSrc, vTgt, **vProps)

# --- 1. JOBS (PIPELINE CONTROL) ---
vJobOutputs = {} # Store JobID -> [TargetSchemas]
vJobMeta = {} # Map NodeID -> NotebookName
vExcelReportJobId = None

vMermaidLines.append("    subgraph Orchestration")
try:
    dfPipeline = vCon.sql("SELECT * FROM MovieReleases.main.pipeline_control ORDER BY step_id").df()
    
    for vIdx, vRow in dfPipeline.iterrows():
        # 1. Skip DQ Report (As requested)
        if 'dq_report' in vRow['notebook_path']:
            continue

        vNodeId = f"JOB_{vRow['step_id']}"
        vLabel = f"Step {vRow['step_id']}: {vRow['notebook_path']}"
        vNotebookName = vRow['notebook_path']
        
        # Store Meta
        vJobMeta[vNodeId] = vNotebookName
        if 'excel_report' in vNotebookName:
            vExcelReportJobId = vNodeId
        
        # Mermaid
        vMermaidLines.append(f'        {vNodeId}("{vLabel}")')
        
        # PyVis
        f_add_node(vNodeId, "notebook", vNotebookName)
        
        # Map Job to Layer for later linking
        vLayer = vRow['layer']
        if vLayer not in vJobOutputs:
            vJobOutputs[vLayer] = []
        vJobOutputs[vLayer].append(vNodeId)

except Exception as e:
    print(f"Warning: {e}")
vMermaidLines.append("    end\n")

# --- 2. DATA LINEAGE ---
vMermaidLines.append("    subgraph Data_Lineage")

# A. Landing -> Bronze
try:
    dfBronzeCfg = vCon.sql("SELECT * FROM MovieReleases.main.bronze_config").df()
    for vIdx, vRow in dfBronzeCfg.iterrows():
        vSrc = f"{vRow['source_schema']}.{vRow['source_table']}"
        vTgt = f"{vRow['target_schema']}.{vRow['target_table']}"
        
        # Nodes
        f_add_node(vSrc, vRow['source_schema'])
        f_add_node(vTgt, vRow['target_schema'])
        
        # Edge
        f_add_edge(vSrc, vTgt, vRow['merge_pattern'])
        vMermaidLines.append(f"        {vSrc} -->|{vRow['merge_pattern']}| {vTgt}")
        
        # Link Jobs to Data (Notebook -> Table)
        # Ingest Job -> Landing Table
        if 'landing' in vJobOutputs:
            for j in vJobOutputs['landing']:
                f_add_edge(j, vSrc, "Writes", vDashed=True)
        
        # Bronze Job -> Bronze Table
        if 'bronze' in vJobOutputs:
            for j in vJobOutputs['bronze']:
                f_add_edge(j, vTgt, "Writes", vDashed=True)
                
except Exception as e:
    print(f"Warning: {e}")

# B. Bronze -> Silver
for vSrc, vTgt, vDesc in cSilverEdges:
    f_add_node(vSrc, "bronze" if "bronze" in vSrc else "silver")
    f_add_node(vTgt, "silver")
    f_add_edge(vSrc, vTgt, vDesc)
    vMermaidLines.append(f"        {vSrc} -->|{vDesc}| {vTgt}")

# Link Silver Notebooks (Using Explicit Map)
for vNodeId, vNbName in vJobMeta.items():
    if vNbName in cJobToTableMap:
        for vTgtTable in cJobToTableMap[vNbName]:
            f_add_edge(vNodeId, vTgtTable, "Writes", vDashed=True)

# C. Silver -> Gold
if os.path.exists(cGoldConfigPath):
    dfGoldCfg = pd.read_csv(cGoldConfigPath)
    for vIdx, vRow in dfGoldCfg.iterrows():
        vSrc = f"{vRow['source_schema']}.{vRow['source_table']}"
        vRawName = vRow['target_view_name']
        vTgtId = "gold_" + vRawName.replace(" ", "_").replace("\"", "")
        vTgtLabel = f"gold.{vRawName}"
        
        f_add_node(vSrc, "silver")
        f_add_node(vTgtId, "gold", vTgtLabel)
        f_add_edge(vSrc, vTgtId, "View")
        vMermaidLines.append(f'        {vSrc} -->|View| {vTgtId}["{vTgtLabel}"]')
        
        # Link Gold Job
        if 'gold' in vJobOutputs:
            for j in vJobOutputs['gold']:
                f_add_edge(j, vTgtId, "Creates", vDashed=True)

# D. Reporting Flow (Excel)
if vExcelReportJobId:
    # 1. Inputs: Link Silver Tables -> Excel Job
    # We assume standard inputs for the report
    vReportInputs = ['silver.film_release_fact', 'silver.film_release_dim', 'silver.date_dim']
    for vInputTable in vReportInputs:
        f_add_edge(vInputTable, vExcelReportJobId, "Reads", vDashed=True)
        
    # 2. Output: Excel Job -> Artifact
    vArtifactId = "Upcoming_Releases.xlsx"
    f_add_node(vArtifactId, "artifact")
    f_add_edge(vExcelReportJobId, vArtifactId, "Generates")

vMermaidLines.append("    end")
vMermaidLines.append("```")

# Join Mermaid
vOutputContent = "\n".join(vMermaidLines)

print(f"Generated {len(vNet.nodes)} nodes for PyVis.")

## 3. Save to File
Write the markdown and HTML files to the docs directory.

In [None]:
os.makedirs(os.path.dirname(cOutputPathMd), exist_ok=True)

# Save Markdown
with open(cOutputPathMd, "w", encoding="utf-8") as f:
    f.write("# Live System Architecture\n\n")
    f.write("This diagram is **auto-generated** by the ETL pipeline based on the active configuration.\n\n")
    f.write(vOutputContent)

# Save PyVis HTML
# PyVis writes full HTML content
vNet.save_graph(cOutputPathHtml)

print(f"Documentation saved to:\n- {cOutputPathMd}\n- {cOutputPathHtml}")
vCon.close()