# Interactive Multilayer NN (Graphviz + ipywidgets)

This notebook uses Graphviz to visualize the structure of a multilayer neural network.

Farhad Kamangar, 2026
(with assistance from ChatGPT-5.2)



In [11]:
# --- Widget compatibility helpers ---
# If you're running in Google Colab, widgets need the custom widget manager enabled.
try:
    import google.colab  # type: ignore
    from google.colab import output  # type: ignore
    output.enable_custom_widget_manager()
except Exception:
    pass

from graphviz import Digraph
from IPython.display import display, Math, Markdown, clear_output
import ipywidgets as widgets

def mlp_layer_graph_from_list(
    layer_sizes,                      # e.g. [2, 6, 4, 3]
    layer_names=None,                 # optional names
    rankdir="LR",
    ranksep=1.2, nodesep=0.35,
    splines="false",
    show_backward=True,
    show_grad_overlays=True,
    edge_labels=False,
):
    """
    Architecture view (neurons as nodes) for arbitrary depth.

    FIXED:
    - Correct labeling for multi-output layers
    - ALL output nodes connect to loss L
    """
    assert isinstance(layer_sizes, (list, tuple)) and len(layer_sizes) >= 2
    L = len(layer_sizes) - 1

    if layer_names is None:
        if len(layer_sizes) == 2:
            layer_names = ["X", "Y"]
        else:
            layer_names = ["X"] + [f"H{i}" for i in range(1, len(layer_sizes)-1)] + ["Y"]
    assert len(layer_names) == len(layer_sizes)

    g = Digraph("MLP", format="png")
    g.attr(
        rankdir=rankdir, bgcolor="white", compound="true",
        ranksep=str(ranksep), nodesep=str(nodesep), splines=str(splines)
    )
    g.attr("edge", label="")
    g.attr("node", shape="circle", fontsize="12")

    layer_nodes = []

    # Build layers
    for lname, n in zip(layer_names, layer_sizes):
        cluster = f"cluster_{lname}"
        nodes = []
        with g.subgraph(name=cluster) as c:
            c.attr(label=lname, style="rounded", rank="same")
            for i in range(int(n), 0, -1):
                # Label nodes 1..n (top node = 1 in typical dot layouts)
                nid = f"{lname}_{i}"
                c.node(nid, label=str(i), sortv=str(i))
                nodes.append(nid)
        layer_nodes.append(nodes)

    # Forward edges
    fwd = "#1f77b4"
    for ell in range(L):
        for s in layer_nodes[ell]:
            for t in layer_nodes[ell+1]:
                g.edge(s, t, color=fwd)

    # Loss node
    g.attr("node", shape="box", style="rounded")
    g.node("L", label="L (loss)")
    g.attr("node", shape="circle", fontsize="12")

    # CONNECT *ALL* output neurons to loss (FIX)
    for y in layer_nodes[-1]:
        g.edge(y, "L", color=fwd)

    bwd = "#d62728"

    if show_backward:
        # Backward signal from loss to each output neuron
        for y in layer_nodes[-1]:
            g.edge("L", y, color=bwd, style="dashed", constraint="false")

        # Representative backward chain through hidden layers
        for ell in range(len(layer_nodes)-1, 0, -1):
            g.edge(layer_nodes[ell][0], layer_nodes[ell-1][0],
                   color=bwd, style="dashed", constraint="false")

    if show_grad_overlays:
        for ell in range(1, L+1):
            g.edge(
                "L", layer_nodes[ell-1][0],
                color=bwd, style="dashed", constraint="false",
                label=f"∂L/∂W{ell}"
            )

    return g

# Demo
mlp_layer_graph_from_list([2, 6, 4, 3], ranksep=1.4)

from graphviz import Digraph

def backprop_tensor_graph_from_list(
    layer_sizes,                # e.g. [2,6,4,1]
    rankdir="LR",
    ranksep=1.2, nodesep=0.35,
    splines="false",
    show_backward=True,
    show_grad_overlays=True,
    activation_symbol="φ",
    label_edges=False           # if False, suppress edge labels
):
    """
    Tensor/operation graph for arbitrary depth.
      - Forward edges: blue solid
      - Backward/gradient flow: red dashed (optional)
      - Gradient overlays to Wℓ,bℓ: red dashed (optional)
      - Hidden layers use activation; output is linear.
    """
    assert isinstance(layer_sizes, (list, tuple)) and len(layer_sizes) >= 2, "layer_sizes must be a list like [d_in,...,d_out]"
    L = len(layer_sizes) - 1

    g = Digraph("BackpropTensorGraph", format="png")
    g.attr(rankdir=rankdir, bgcolor="white",
           ranksep=str(ranksep), nodesep=str(nodesep), splines=str(splines))
    if not label_edges:
        g.attr("edge", label="")

    fwd = dict(color="#1f77b4")
    bwd = dict(color="#d62728", style="dashed", constraint="false")

    g.attr("node", shape="box", style="rounded", fontsize="12")

    def T(name, shape=None):
        # lab = name if shape is None else f"{name}\n{shape}"
        lab = name if shape is None else f"{shape}"
        # g.node(name, label=lab)
        g.node(name)

    def op(name, lab):
        # g.node(name, label=lab, shape="ellipse", style="solid")
        g.node(name, shape="ellipse", style="solid")

    T("X",   f"B×{layer_sizes[0]}")
    T("Y",   f"B×{layer_sizes[-1]}")
    T("L",   "scalar")

    prev_act = "X"
    for ell in range(1, L+1):
        din, dout = layer_sizes[ell-1], layer_sizes[ell]
        T(f"W{ell}", f"{din}×{dout}")
        T(f"b{ell}", f"{dout}")

        op(f"mm{ell}", "matmul")
        op(f"add{ell}", "+")

        g.edge(prev_act, f"mm{ell}", **fwd)
        g.edge(f"W{ell}", f"mm{ell}", **fwd)
        g.edge(f"mm{ell}", f"add{ell}", **fwd)
        g.edge(f"b{ell}", f"add{ell}", **fwd)

        if ell < L:
            T(f"Z{ell}", f"B×{dout}")
            g.edge(f"add{ell}", f"Z{ell}", **fwd)

            op(f"phi{ell}", activation_symbol)
            T(f"A{ell}", f"B×{dout}")
            g.edge(f"Z{ell}", f"phi{ell}", **fwd)
            g.edge(f"phi{ell}", f"A{ell}", **fwd)
            prev_act = f"A{ell}"
        else:
            T("Yhat", f"B×{dout}")
            g.edge(f"add{ell}", "Yhat", **fwd)

    op("mse", "MSE")
    g.edge("Yhat", "mse", **fwd)
    g.edge("Y", "mse", **fwd)
    g.edge("mse", "L", **fwd)

    if show_backward:
        g.edge("L", "mse", **bwd)
        g.edge("mse", "Yhat", **bwd)

        for ell in range(L, 0, -1):
            if ell == L:
                g.edge("Yhat", f"add{ell}", **bwd)
                src = f"add{ell}"
            else:
                g.edge(f"A{ell}", f"phi{ell}", **bwd)
                g.edge(f"phi{ell}", f"Z{ell}", **bwd)
                g.edge(f"Z{ell}", f"add{ell}", **bwd)
                src = f"add{ell}"

            prev = "X" if ell == 1 else f"A{ell-1}"
            g.edge(src, prev, **bwd)

    if show_grad_overlays:
        for ell in range(1, L+1):
            g.edge("L", f"W{ell}", label=(f"∂L/∂W{ell}" if label_edges else ""), **bwd)
            g.edge("L", f"b{ell}", label=(f"∂L/∂b{ell}" if label_edges else ""), **bwd)

    return g

backprop_tensor_graph_from_list([2, 6, 4, 1], rankdir="LR", ranksep=1.2, nodesep=0.35, splines="false", label_edges=False)

# @title
from ipywidgets import interact, Text, FloatSlider, Dropdown, Checkbox
from IPython.display import display, Markdown

def _parse_layers(s: str):
    s = s.strip()
    if s.startswith("[") and s.endswith("]"):
        s = s[1:-1]
    parts = [p.strip() for p in s.split(",") if p.strip()]
    layers = [int(p) for p in parts]
    if len(layers) < 2:
        raise ValueError("Need at least two integers, e.g. 2,4,1")
    if any(x <= 0 for x in layers):
        raise ValueError("All layer sizes must be positive integers")
    return layers

# Shared architecture textbox (single source of truth for Graphviz + LaTeX)
layers_text = widgets.Text(value="3,4,2", description="layers", layout=widgets.Layout(width="240px"))

@interact(
    layers=layers_text,
    which=Dropdown(options=["Neuron-layer graph", "Tensor/operation graph"], value="Neuron-layer graph"),
    rankdir=Dropdown(options=["LR","TB"], value="LR"),
    ranksep=FloatSlider(min=0.4, max=2.8, step=0.1, value=1.4, description="Layers distance"),
    nodesep=FloatSlider(min=0.1, max=1.2, step=0.05, value=0.35, description="Nodes distance"),
    splines=Dropdown(options=["false","line","true"], value="false", description="splines"),
    show_backward=Checkbox(value=False, description="show backward"),
    show_grad_overlays=Checkbox(value=False, description="show ∂L/∂W overlays"),
    edge_labels=Checkbox(value=False, description="label forward edges"),
    label_edges=Checkbox(value=False, description="label grad edges (tensor)"),
)

def show_graphviz(layers, which, rankdir, ranksep, nodesep, splines, show_backward, show_grad_overlays, edge_labels, label_edges):
    try:
        layer_sizes = _parse_layers(layers)
    except Exception as e:
        display(Markdown(f"**Parse error:** {e}"))
        return
    if which == "Neuron-layer graph":
        g = mlp_layer_graph_from_list(
        layer_sizes,
        rankdir=rankdir,
        ranksep=ranksep,
        nodesep=nodesep,
        splines=splines,
        show_backward=show_backward,
        show_grad_overlays=show_grad_overlays,
        edge_labels=edge_labels,
        )

        import re
        def replace_neuron_labels_with_dot(g, dot="•"):
            """
            Replace labels on neuron nodes (like X_0, H1_3, Y_2) with a dot.
            This works even if labels were explicitly set in the graph construction.
            """
            new_body = []
            node_id_pat = re.compile(r'^\s*"?([A-Za-z][A-Za-z0-9]*_\d+)"?\s+\[')
            label_num_pat = re.compile(r'\blabel\s*=\s*"?\d+"?')
            for line in g.body:
                m = node_id_pat.match(line)
                if m:
                    # It's a neuron node line. Replace any numeric label; otherwise inject a label.
                    if label_num_pat.search(line):
                        line = label_num_pat.sub(f'label="{dot}"', line)
                    else:
                        # Insert label at the start of attribute list
                        line = line.replace('[', f'[label="{dot}", ', 1)
                new_body.append(line)
            g.body = new_body
            return g

        # Replace neuron labels with dots in this cell
        replace_neuron_labels_with_dot(g, dot="•")

        return g
    else:
        return backprop_tensor_graph_from_list(
            layer_sizes,
            rankdir=rankdir, ranksep=ranksep, nodesep=nodesep, splines=splines,
            show_backward=show_backward, show_grad_overlays=show_grad_overlays,
            label_edges=label_edges
        )
def _latex_matrix_symbolic(rows, cols, base):
    """
    Create LaTeX for a rows x cols matrix with entries base_{i,j} (1-indexed).
    """
    lines = []
    for i in range(1, rows+1):
        row_entries = [rf"{base}_{{{i},{j}}}" for j in range(1, cols+1)]
        lines.append(" & ".join(row_entries))
    body = r" \\ ".join(lines) if lines else ""
    return rf"\begin{{bmatrix}} {body} \end{{bmatrix}}"

def _latex_vector_symbolic(n, base):
    entries = [rf"{base}_{{{i}}}" for i in range(1, n+1)]
    body = r" \\ ".join(entries) if entries else ""
    return rf"\begin{{bmatrix}} {body} \end{{bmatrix}}"

def _render_layer_matrices(layer_sizes, embed_bias_top_row=True):
    """
    layer_sizes: list like [n0, n1, n2, ...] where n0 is input dim.
    If embed_bias_top_row=True, show W^{[l]} with shape (n_{l-1}+1) x n_l
    with first row corresponding to bias weights.
    Else, show W^{[l]} with shape n_{l-1} x n_l and b^{[l]} with shape n_l x 1.
    """
    out = []
    L = len(layer_sizes) - 1  # number of weight layers
    for l in range(1, L+1):
        n_prev = layer_sizes[l-1]
        n_cur  = layer_sizes[l]
        if embed_bias_top_row:
            rows = n_prev + 1
            cols = n_cur
            base = rf"w^{{[{l}]}}"
            W = _latex_matrix_symbolic(rows, cols, base)
            desc = rf"Layer {l}:~\tilde{{W}}^{{[{l}]}} \in \mathbb{{R}}^{{({n_prev}+1)\times {n_cur}}}"
            out.append((desc, W))
        else:
            baseW = rf"w^{{[{l}]}}"
            W = _latex_matrix_symbolic(n_prev, n_cur, baseW)
            baseb = rf"b^{{[{l}]}}"
            b = _latex_vector_symbolic(n_cur, baseb)
            descW = rf"Layer {l}:~W^{{[{l}]}} \in \mathbb{{R}}^{{{n_prev}\times {n_cur}}}"
            descb = rf"Layer {l}:~b^{{[{l}]}} \in \mathbb{{R}}^{{{n_cur}\times 1}}"
            out.append((descW, W))
            out.append((descb, b))
    return out




def _latex_matrix_symbolic_compact(cols, base, m_symbol="m"):
    """
    Compact symbolic m x cols matrix with dots (for batch-mode outputs).
    cols: int number of columns
    base: LaTeX base symbol, e.g. a^{[1]} or z^{[2]}
    """
    if cols <= 0:
        return r"\begin{bmatrix}\end{bmatrix}"
    if cols == 1:
        return rf"\begin{{bmatrix}} {base}_{{1,1}} \\ \vdots \\ {base}_{{{m_symbol},1}} \end{{bmatrix}}"
    # Show first 3 columns (or fewer) then ellipsis then last column
    show_k = min(3, cols)
    first_cols = [rf"{base}_{{1,{j}}}" for j in range(1, show_k+1)]
    last_col = rf"{base}_{{1,{cols}}}"
    row1 = " & ".join(first_cols + ([r"\cdots", last_col] if cols > show_k else []))
    mid_cols = [r"\vdots"] * show_k
    mid_last = r"\vdots"
    row2 = " & ".join(mid_cols + ([r"\ddots", mid_last] if cols > show_k else []))
    last_cols = [rf"{base}_{{{m_symbol},{j}}}" for j in range(1, show_k+1)]
    last_last = rf"{base}_{{{m_symbol},{cols}}}"
    row3 = " & ".join(last_cols + ([r"\cdots", last_last] if cols > show_k else []))
    return rf"\begin{{bmatrix}} {row1} \\ {row2} \\ {row3} \end{{bmatrix}}"

def _render_forward_symbolic(layer_sizes, embed_bias_top_row=True, activation_hidden=r"\sigma", activation_out=r"\mathrm{id}"):
    r"""
    Produce symbolic forward-pass equations and outputs A^[l], Z^[l] as matrices (batch-mode).
    Assumes row-major samples: A^[0] = X \in R^{m x n0}.
    If embed_bias_top_row=True, uses augmented activations with a leading 1-column.
    """
    out = []
    L = len(layer_sizes) - 1
    n0 = layer_sizes[0]
    # A0
    out.append((r"Inputs:~A^{[0]} = X \in \mathbb{R}^{m\times " + str(n0) + r"}",
                _latex_matrix_symbolic_compact(n0, r"x")))
    for l in range(1, L+1):
        n_prev = layer_sizes[l-1]
        n_cur  = layer_sizes[l]

        if embed_bias_top_row:
            out.append((rf"Augment:~\tilde{{A}}^{{[{l-1}]}} = \left[\mathbf{{1}}\;\;A^{{[{l-1}]}}\right] \in \mathbb{{R}}^{{m\times ({n_prev}+1)}}",
                        r"\tilde{A}^{[" + str(l-1) + r"]}"))
            out.append((rf"Pre-activation:~Z^{{[{l}]}} = \tilde{{A}}^{{[{l-1}]}}\,\tilde{{W}}^{{[{l}]}} \in \mathbb{{R}}^{{m\times {n_cur}}}",
                        _latex_matrix_symbolic_compact(n_cur, rf"z^{{[{l}]}}")))
        else:
            out.append((rf"Pre-activation:~Z^{{[{l}]}} = A^{{[{l-1}]}} W^{{[{l}]}} + \mathbf{{1}}\,\left(b^{{[{l}]}}\right)^T \in \mathbb{{R}}^{{m\times {n_cur}}}",
                        _latex_matrix_symbolic_compact(n_cur, rf"z^{{[{l}]}}")))

        # activation
        if l < L:
            out.append((rf"Activation:~A^{{[{l}]}} = {activation_hidden}\!\left(Z^{{[{l}]}}\right) \in \mathbb{{R}}^{{m\times {n_cur}}}",
                        _latex_matrix_symbolic_compact(n_cur, rf"a^{{[{l}]}}")))
        else:
            out.append((rf"Output:~\hat{{Y}} = A^{{[{l}]}} = {activation_out}\!\left(Z^{{[{l}]}}\right) \in \mathbb{{R}}^{{m\times {n_cur}}}",
                        _latex_matrix_symbolic_compact(n_cur, rf"\hat{{y}}")))
    return out

# ---- LaTeX matrices UI (driven by the SAME `layers` structure used above) ----

embed_bias = widgets.ToggleButtons(
    options=[("Bias as top row (XW+b)", True), ("Bias separate (XW + b)", False)],
    value=True,
    description="Format:",
)

latex_out = widgets.Output()

def update_latex_display(*_):
    with latex_out:
        clear_output(wait=True)
        try:
            sizes = _parse_layers(layers_text.value)
        except Exception as e:
            display(Markdown(f"**Parse error:** {e}"))
            return

        display(Markdown("### Layer dimensions"))
        display(Math(r"\text{Layer sizes } = " + r"\left[" + ",".join(map(str, sizes)) + r"\right]"))
        display(Markdown("---"))

        mats = _render_layer_matrices(sizes, embed_bias_top_row=embed_bias.value)
        display(Markdown("### Symbolic matrices"))
        for desc, latex in mats:
            display(Math(desc))
            display(Math(latex))

        display(Markdown("---"))
        display(Markdown("### Symbolic forward pass (layer outputs)"))
        fwd = _render_forward_symbolic(sizes, embed_bias_top_row=embed_bias.value)
        for desc, latex in fwd:
            display(Math(desc))
            display(Math(latex))

# Wire events
layers_text.observe(update_latex_display, names="value")
embed_bias.observe(update_latex_display, names="value")

# latex_controls = widgets.VBox([widgets.HBox([layers_text]), embed_bias])
latex_controls = widgets.VBox([embed_bias])
display(latex_controls, latex_out)
# display( latex_out)
update_latex_display()


interactive(children=(Text(value='3,4,2', description='layers', layout=Layout(width='240px')), Dropdown(descri…

VBox(children=(ToggleButtons(description='Format:', options=(('Bias as top row (XW+b)', True), ('Bias separate…

Output()