# GRADIENT FLOW VISUALIZATION

In [2]:
from google.colab import drive
drive.mount('/content/drive')
dir = 'drive/MyDrive/MDS/ADM/'
import sys
sys.path.insert(1,dir)

Mounted at /content/drive


Import the libraries and generate synthetic data.

In [3]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import matplotlib.colors as mcolors
import matplotlib.cm as cm
from matplotlib.animation import FuncAnimation

np.random.seed(42)

# 1) Prepare XOR dataset\ nx = None
x1 = np.random.rand(1000, 1) * 10 - 5
x2 = np.random.rand(1000, 1) * 10 - 5
y_xor = np.logical_xor(x1 > 0, x2 > 0).astype(np.float32)
X = np.hstack([x1, x2]).astype(np.float32)

dataset = tf.data.Dataset.from_tensor_slices((X, y_xor))
dataset = dataset.shuffle(buffer_size=100).batch(4)

Define model in Keras

In [4]:
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(2, activation='relu', input_shape=(2,), name='hidden_layer'),
        tf.keras.layers.Dense(1, activation='sigmoid', name='output_layer')
    ])
    return model


 Fixed graph layout

In [5]:
data_ids = ['i0', 'i1']
hidden_ids = ['h0', 'h1']
output_ids = ['o0']
all_nodes = data_ids + hidden_ids + output_ids
layer_counts = [len(data_ids), len(hidden_ids), len(output_ids)]
max_nodes = max(layer_counts)

def vertical_pos(layer, nodes):
    offset = (max_nodes - len(nodes)) / 2
    return {node: (layer, -(i + offset)) for i, node in enumerate(nodes)}
pos = {}
pos.update(vertical_pos(0, data_ids))
pos.update(vertical_pos(1, hidden_ids))
pos.update(vertical_pos(2, output_ids))

In [6]:
def animate_gradient_flow(lr=0.01,
                           loss_threshold=1e-3,
                           max_epochs=100,
                           interval_ms=200,
                           save_path="xor.mp4"):
    model = create_model()
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    loss_fn = tf.keras.losses.BinaryCrossentropy()

    # Records
    grad_norm_h = []  # for hidden layer weights
    grad_norm_o = []  # for output layer weights
    last_loss = 1e9

    fig, (ax_graph, ax_grad) = plt.subplots(1, 2, figsize=(14, 6))

    def update(epoch):
        nonlocal last_loss
        for x_batch, y_batch in dataset:
            with tf.GradientTape() as tape:
                preds = model(x_batch, training=True)
                loss = loss_fn(y_batch, preds)
            grads = tape.gradient(loss, model.trainable_weights)
            optimizer.apply_gradients(zip(grads, model.trainable_weights))
        last_loss = loss.numpy()
        g_h = tf.norm(grads[0]).numpy()
        g_o = tf.norm(grads[2]).numpy()
        grad_norm_h.append(g_h)
        grad_norm_o.append(g_o)

        # Build network graph
        G = nx.DiGraph()
        for n in all_nodes:
            G.add_node(n)
        grad_h = np.abs(grads[0].numpy())
        grad_o = np.abs(grads[2].numpy())
        for i in range(2):
            for j in range(2):
                G.add_edge(data_ids[i], hidden_ids[j], weight=grad_h[i, j])
        for i in range(2):
            G.add_edge(hidden_ids[i], output_ids[0], weight=grad_o[i, 0])

        weights = [G[u][v]['weight'] for u, v in G.edges()]
        vmax = max(weights) if weights else 1e-6
        norm = mcolors.Normalize(vmin=0, vmax=vmax)
        cmap = cm.get_cmap('plasma')
        colors = [cmap(norm(w)) for w in weights]

        ax_graph.clear()
        nx.draw(G, pos, ax=ax_graph,
                with_labels=True, node_color='skyblue', node_size=800,
                arrows=True, edge_color=colors, width=2)
        labels = {(u, v): f"{G[u][v]['weight']:.4f}" for u, v in G.edges()}
        for (u, v), lbl in labels.items():
            x1, y1 = pos[u]; x2, y2 = pos[v]
            mx, my = (x1 + x2)/2, (y1 + y2)/2
            dx, dy = x2-x1, y2-y1
            length = np.hypot(dx, dy)
            px, py = -dy/length, dx/length
            offset = 0.2 if (u, v) in [(data_ids[0], hidden_ids[1]), (data_ids[1], hidden_ids[0])] else 0
            ax_graph.text(mx+px*offset, my+py*offset, lbl,
                          fontsize=8, ha='center', va='center',
                          bbox=dict(facecolor='white', pad=0.2))
        ax_graph.set_title(f"Epoch {epoch+1} (loss={last_loss:.4f})")
        ax_graph.axis('off')

        ax_grad.clear()
        ax_grad.plot(range(1, len(grad_norm_h)+1), grad_norm_h, label='hidden grad norm')
        ax_grad.plot(range(1, len(grad_norm_o)+1), grad_norm_o, label='output grad norm')
        ax_grad.set_yscale('log')
        ax_grad.set_xlabel('Epoch')
        ax_grad.set_ylabel('Gradient Norm (log-scale)')
        ax_grad.set_title('Gradient Flow Over Epochs')
        ax_grad.legend(); ax_grad.grid(True)

    def frame_generator():
        for epoch in range(max_epochs):
            yield epoch
            if last_loss < loss_threshold:
                print(f"Converged at epoch {epoch+1} (loss={last_loss:.4f})")
                break

    anim = FuncAnimation(fig, update, frames=frame_generator(), interval=interval_ms, repeat=False)
    anim.save(save_path, writer='ffmpeg', dpi=150)
    plt.close(fig)
    print(f"Saved TensorFlow gradient-flow animation to '{save_path}'")

In [7]:
if __name__ == '__main__':
    animate_gradient_flow(save_path=dir+'xor.mp4')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  anim = FuncAnimation(fig, update, frames=frame_generator(), interval=interval_ms, repeat=False)
  cmap = cm.get_cmap('plasma')


Saved TensorFlow gradient-flow animation to 'drive/MyDrive/MDS/ADM/xor.mp4'
