In [3]:
import numpy as np
import matplotlib.pyplot as plt

def compute_gradients(Phi, X, V, b, N, I, NT, sigma, weights, dt):
    """
    Compute gradients gradV and gradb based on the provided parameters.

    Parameters:
    - Phi: Array of shape (N * I, NT), the feature matrix.
    - X: Array of shape (N * I, NT), input data.
    - V: Array of shape (N, N), weights to be updated.
    - b: Array of shape (N,), bias to be updated.
    - N: Integer, size of a single data block.
    - I: Integer, number of blocks.
    - NT: Integer, number of time steps.
    - sigma: Function, non-linearity to apply on input data.
    - weights: Array of regularization weights.
    - dt: Float, time step size.

    Returns:
    - gradV: Gradient of V.
    - gradb: Gradient of b.
    """
    gradV = np.zeros_like(V)
    gradb = np.zeros_like(b)

    for kk in range(NT - 1):
        for ii in range(I):
            ind = slice(N * (ii), N * (ii + 1))  # Selecting block indices
            s = sigma(X[ind, kk])  # Apply non-linearity
            gradV += dt * np.outer(Phi[ind, kk], s)  # Update gradV
            gradb += dt * Phi[ind, kk]  # Update gradb

    # Add regularization terms
    gradV += dt * weights[1] * V
    gradb += dt * weights[1] * b

    return gradV, gradb


In [8]:
# Example non-linearity function
def sigma(x):
    return np.tanh(x)  # Example activation function

# Example input sizes
N = 5
I = 4
NT = 10

# Random inputs for demonstration
Phi = np.random.rand(N * I, NT)
X = np.random.rand(N * I, NT)
V = np.random.rand(N, N)
b = np.random.rand(N)
weights = [0, 0.1]
dt = 0.01

# Compute gradients
gradV, gradb = compute_gradients(Phi, X, V, b, N, I, NT, sigma, weights, dt)

print("Gradient of V:", gradV)
print("Gradient of b:", gradb)


Gradient of V: [[0.06775705 0.07735735 0.09338362 0.07529366 0.0899381 ]
 [0.07511462 0.07930503 0.10011859 0.07639851 0.09587683]
 [0.09053116 0.09134878 0.11631591 0.085109   0.10915343]
 [0.07203    0.07199485 0.1043369  0.06364845 0.09614752]
 [0.07717726 0.07329497 0.09248259 0.06807113 0.09050142]]
Gradient of b: [0.1814054  0.19120834 0.2161243  0.19163407 0.17000363]


In [10]:
def visualize_gradients_over_time(gradV, gradb):
    """
    Visualize gradients gradV and gradb over time.

    Parameters:
    - gradV: Gradient of V, assumed to be (N, N, NT-1).
    - gradb: Gradient of b, assumed to be (N, NT-1).
    """
    gradV_norm = np.linalg.norm(gradV, axis=(0, 1))  # Norm of gradV at each time step
    gradb_norm = np.linalg.norm(gradb, axis=0)       # Norm of gradb at each time step

    time_steps = range(len(gradb_norm))  # Assuming NT-1 time steps

    plt.figure(figsize=(12, 6))
    plt.plot(time_steps, gradV_norm, label="Gradient Norm (V)", marker='o')
    plt.plot(time_steps, gradb_norm, label="Gradient Norm (b)", marker='x')
    plt.xlabel("Time Step")
    plt.ylabel("Gradient Norm")
    plt.title("Gradient Norms Over Time")
    plt.legend()
    plt.grid()
    plt.show()
