In [1]:
import numpy as np

In [2]:
a = np.array([1, 2, 3])
print( a.shape)

(3,)


In [3]:
b = np.broadcast_to( a, (3,3))
print( b)
print( b.shape)

[[1 2 3]
 [1 2 3]
 [1 2 3]]
(3, 3)


In [4]:
c = b[:,-1]
print( c)

[3 3 3]


In [5]:
d = np.mean( c, axis=0)
print( d)

3.0


In [6]:
a = np.array([[1, 2], [3, 4]])
print( np.mean(a, axis=0))

[2. 3.]


In [7]:
a = np.random.randn(3, 1)
print( a)
b = np.broadcast_to( a, (3,3))
print( b)

[[ 0.34720547]
 [ 1.23596384]
 [-1.46726515]]
[[ 0.34720547  0.34720547  0.34720547]
 [ 1.23596384  1.23596384  1.23596384]
 [-1.46726515 -1.46726515 -1.46726515]]


In [8]:
b = np.broadcast_to( a, (3,3,3))
print( b)

[[[ 0.34720547  0.34720547  0.34720547]
  [ 1.23596384  1.23596384  1.23596384]
  [-1.46726515 -1.46726515 -1.46726515]]

 [[ 0.34720547  0.34720547  0.34720547]
  [ 1.23596384  1.23596384  1.23596384]
  [-1.46726515 -1.46726515 -1.46726515]]

 [[ 0.34720547  0.34720547  0.34720547]
  [ 1.23596384  1.23596384  1.23596384]
  [-1.46726515 -1.46726515 -1.46726515]]]


In [9]:
# Original array
original = np.array([[1], [2], [3]])  # Shape (3, 1)

# Broadcasted array
broadcasted = np.broadcast_to(original, (3, 3))  # Shape (3, 3)

# Reverse operation by summing over the broadcasted axis
reversed_array = np.sum(broadcasted, axis=1, keepdims=True) / broadcasted.shape[1]
print("Reversed Array:\n", reversed_array)  # Shape (3, 1)


Reversed Array:
 [[1.]
 [2.]
 [3.]]


In [12]:
def gradient_of_broadcast_to(grad_output, original_shape):
    """
    Compute the gradient for a broadcast_to operation during backpropagation.

    Parameters:
        grad_output (np.ndarray): Gradient of the loss with respect to the broadcasted array.
        original_shape (tuple): Shape of the original array before broadcasting.

    Returns:
        np.ndarray: Gradient of the loss with respect to the original array.
    """
    print( original_shape)
    if ( len( original_shape) == 1):
        return [ np.sum( grad_output) ]
        
    # Determine the broadcasted axes
    if ( len( original_shape) != len( grad_output.shape)):
        raise ValueError( "mismatching shapes")
    ## we collect the axis which have been broadcast. assuming the original shape was 1...
    axes_to_sum = [i for i, (b, o) in enumerate(zip(grad_output.shape, original_shape)) if o == 1]
    print( "axes_to_sum: ", axes_to_sum)
    
    # Sum over broadcasted axes
    grad_input = np.sum(grad_output, axis=tuple(axes_to_sum), keepdims=True)
    
    # Reshape back to the original shape
    return grad_input.reshape(original_shape)

In [13]:
# Example
original = np.random.randn(3, 1)  # Shape (3, 1)
broadcasted = np.broadcast_to(original, (3, 3))  # Shape (3, 3)

# Reverse
reversed_array = gradient_of_broadcast_to(broadcasted, original.shape)
print("Broadcasted:\n", broadcasted)
print("Reversed Array:\n", reversed_array)

(3, 1)
axes_to_sum:  [1]
Broadcasted:
 [[-0.52321083 -0.52321083 -0.52321083]
 [-0.63144289 -0.63144289 -0.63144289]
 [ 0.16874324  0.16874324  0.16874324]]
Reversed Array:
 [[-1.5696325 ]
 [-1.89432868]
 [ 0.50622971]]


In [14]:
original = np.random.randn(1, 3)  # Shape (1, 3)
broadcasted = np.broadcast_to(original, (3, 3))  # Shape (3, 3)

# Reverse
reversed_array = gradient_of_broadcast_to(broadcasted, original.shape)
print("Broadcasted:\n", broadcasted)
print("Reversed Array:\n", reversed_array)

(1, 3)
axes_to_sum:  [0]
Broadcasted:
 [[ 1.1792175  -0.5239145  -0.26382784]
 [ 1.1792175  -0.5239145  -0.26382784]
 [ 1.1792175  -0.5239145  -0.26382784]]
Reversed Array:
 [[ 3.53765251 -1.57174349 -0.79148352]]


In [15]:
original = np.random.randn(1,)  # Shape (1, )
broadcasted = np.broadcast_to(original, (3, 3, 3))  # Shape (3, 3, 3)

# Reverse
reversed_array = gradient_of_broadcast_to(broadcasted, original.shape)
print("Original:\n", original)
print("Broadcasted:\n", broadcasted)

print("Reversed Array:\n", reversed_array)

(1,)
Original:
 [-1.74602477]
Broadcasted:
 [[[-1.74602477 -1.74602477 -1.74602477]
  [-1.74602477 -1.74602477 -1.74602477]
  [-1.74602477 -1.74602477 -1.74602477]]

 [[-1.74602477 -1.74602477 -1.74602477]
  [-1.74602477 -1.74602477 -1.74602477]
  [-1.74602477 -1.74602477 -1.74602477]]

 [[-1.74602477 -1.74602477 -1.74602477]
  [-1.74602477 -1.74602477 -1.74602477]
  [-1.74602477 -1.74602477 -1.74602477]]]
Reversed Array:
 [-47.14266876642027]


In [17]:
original = np.random.randn()
if ( np.isscalar(original)):
    original = np.array([original])
print( original)
broadcasted = np.broadcast_to(original, (3, 3, 3))  # Shape (3, 3, 3)

# Reverse
reversed_array = gradient_of_broadcast_to(broadcasted, original.shape)
print("Original:\n", original)
print("Broadcasted:\n", broadcasted)
print("Reversed Array:\n", reversed_array)

[-0.42524255]
(1,)
Original:
 [-0.42524255]
Broadcasted:
 [[[-0.42524255 -0.42524255 -0.42524255]
  [-0.42524255 -0.42524255 -0.42524255]
  [-0.42524255 -0.42524255 -0.42524255]]

 [[-0.42524255 -0.42524255 -0.42524255]
  [-0.42524255 -0.42524255 -0.42524255]
  [-0.42524255 -0.42524255 -0.42524255]]

 [[-0.42524255 -0.42524255 -0.42524255]
  [-0.42524255 -0.42524255 -0.42524255]
  [-0.42524255 -0.42524255 -0.42524255]]]
Reversed Array:
 [-11.481548970904843]


In [19]:
original = np.random.randn(5, 4, 1)
if ( np.isscalar(original)):
    original = np.array([original])
print( original)
broadcasted = np.broadcast_to(original, (5, 4, 3)) 

# Reverse
reversed_array = gradient_of_broadcast_to(broadcasted, original.shape)
print("Original:\n", original)
print("Broadcasted:\n", broadcasted)
print("Reversed Array:\n", reversed_array)

[[[ 0.29877084]
  [-0.71388179]
  [-1.66295128]
  [ 0.06550301]]

 [[ 0.74284454]
  [ 1.13423424]
  [-1.08209977]
  [ 0.16671111]]

 [[ 0.8324051 ]
  [-0.22584753]
  [-0.48674743]
  [-1.44401173]]

 [[-0.45811331]
  [-1.7979243 ]
  [ 0.8949623 ]
  [-0.91325746]]

 [[ 1.00697804]
  [ 1.19306357]
  [-0.90621837]
  [ 0.9132823 ]]]
(5, 4, 1)
axes_to_sum:  [2]
Original:
 [[[ 0.29877084]
  [-0.71388179]
  [-1.66295128]
  [ 0.06550301]]

 [[ 0.74284454]
  [ 1.13423424]
  [-1.08209977]
  [ 0.16671111]]

 [[ 0.8324051 ]
  [-0.22584753]
  [-0.48674743]
  [-1.44401173]]

 [[-0.45811331]
  [-1.7979243 ]
  [ 0.8949623 ]
  [-0.91325746]]

 [[ 1.00697804]
  [ 1.19306357]
  [-0.90621837]
  [ 0.9132823 ]]]
Broadcasted:
 [[[ 0.29877084  0.29877084  0.29877084]
  [-0.71388179 -0.71388179 -0.71388179]
  [-1.66295128 -1.66295128 -1.66295128]
  [ 0.06550301  0.06550301  0.06550301]]

 [[ 0.74284454  0.74284454  0.74284454]
  [ 1.13423424  1.13423424  1.13423424]
  [-1.08209977 -1.08209977 -1.08209977]
  [ 0

In [None]:
gradient_check(ndl.broadcast_to, ndl.Tensor(np.random.randn(3, 1)), shape=(3, 3))
    gradient_check(ndl.broadcast_to, ndl.Tensor(np.random.randn(1, 3)), shape=(3, 3))
    gradient_check(
        ndl.broadcast_to,
        ndl.Tensor(
            np.random.randn(
                1,
            )
        ),
        shape=(3, 3, 3),
    )
    gradient_check(ndl.broadcast_to, ndl.Tensor(np.random.randn()), shape=(3, 3, 3))
    gradient_check(
        ndl.broadcast_to, ndl.Tensor(np.random.randn(5, 4, 1)), shape=(5, 4, 3)
    )

In [452]:
import numpy as np

def gradient_summation(grad_output, input_shape, axis=None, keepdims=False):
    """
    Computes the gradient for a summation operation in backpropagation.

    Parameters:
        grad_output (np.ndarray): Gradient of the loss with respect to the summation result.
        input_shape (tuple): Shape of the original input to the summation operation.
        axis (int or tuple of ints, optional): Axis or axes along which the summation was performed.
        keepdims (bool, optional): Whether the summation kept the reduced dimensions.

    Returns:
        np.ndarray: Gradient of the loss with respect to the input array.
    """
    # If keepdims=False, expand dimensions of grad_output to match input_shape
    if not keepdims:
        grad_output = np.expand_dims(grad_output, axis=axis)

    # Broadcast grad_output to the input shape
    print( grad_output.shape)
    print( input_shape)
    grad_input = np.broadcast_to(grad_output, input_shape)

    return grad_input

# Example
x = np.array([[1, 2], [3, 4]])  # Shape (2, 2)

# Forward summation
y = np.sum(x, axis=1, keepdims=False)  # Shape (2,)

# Gradient of the loss w.r.t. y
grad_output = np.array([1, 2])  # Shape (2,)

# Backward: Compute gradient w.r.t. x
grad_input = gradient_summation(grad_output, input_shape=x.shape, axis=1, keepdims=False)
print("Gradient w.r.t. Input Array:\n", grad_input)


(2, 1)
(2, 2)
Gradient w.r.t. Input Array:
 [[1 1]
 [2 2]]


In [453]:
def test_summation_backward():
    gradient_check(ndl.summation, ndl.Tensor(np.random.randn(5, 4)), axes=(1,))
    gradient_check(ndl.summation, ndl.Tensor(np.random.randn(5, 4)), axes=(0,))
    gradient_check(ndl.summation, ndl.Tensor(np.random.randn(5, 4)), axes=(0, 1))
    gradient_check(ndl.summation, ndl.Tensor(np.random.randn(5, 4, 1)), axes=(0, 1))

In [454]:
# Example
x = np.array( np.random.randn(2, 2))  # Shape (2, 2)

# Forward summation
y = np.sum(x, axis=1, keepdims=False)  # Shape (2,)

# Gradient of the loss w.r.t. y
grad_output = np.array([1, 2])  # Shape (2,)

# Backward: Compute gradient w.r.t. x
grad_input = gradient_summation(grad_output, input_shape=x.shape, axis=1, keepdims=False)
print("Gradient w.r.t. Input Array:\n", grad_input)

(2, 1)
(2, 2)
Gradient w.r.t. Input Array:
 [[1 1]
 [2 2]]


In [459]:
# Example
x = np.array( np.random.randn(2, 2))  # Shape (2, 2)

# Forward summation
y = np.sum(x, axis=1, keepdims=False)  # Shape (2,)
print( y.shape)

# Gradient of the loss w.r.t. y
grad_output = np.array([1, 2])  # Shape (2,)

# Backward: Compute gradient w.r.t. x
grad_input = gradient_summation(grad_output, input_shape=x.shape, axis=0, keepdims=False)
print("Gradient w.r.t. Input Array:\n", grad_input)

(2,)
(1, 2)
(2, 2)
Gradient w.r.t. Input Array:
 [[1 2]
 [1 2]]


In [467]:
# Example
x = np.array( np.random.randn(5, 4))  
print( x.shape)

# Forward summation
y = np.sum(x, axis=tuple((0,1)), keepdims=False)  
print( y.shape)

# Gradient of the loss w.r.t. y
grad_output = 5

# Backward: Compute gradient w.r.t. x
grad_input = gradient_summation(grad_output, input_shape=x.shape, axis=tuple((0,1)), keepdims=False)
print("Gradient w.r.t. Input Array:\n", grad_input)

(5, 4)
()
(1, 1)
(5, 4)
Gradient w.r.t. Input Array:
 [[5 5 5 5]
 [5 5 5 5]
 [5 5 5 5]
 [5 5 5 5]
 [5 5 5 5]]


In [469]:
# Example
x = np.array( np.random.randn(5, 4, 1))  
print( "x.shape: ", x.shape)

# Forward summation
y = np.sum(x, axis=tuple((0,1)), keepdims=False)  
print( "y.shape: ", y.shape)

# Gradient of the loss w.r.t. y
grad_output = 5

# Backward: Compute gradient w.r.t. x
grad_input = gradient_summation(grad_output, input_shape=x.shape, axis=tuple((0,1)), keepdims=False)
print("Gradient w.r.t. Input Array:\n", grad_input)

x.shape:  (5, 4, 1)
y.shape:  (1,)
(1, 1)
(5, 4, 1)
Gradient w.r.t. Input Array:
 [[[5]
  [5]
  [5]
  [5]]

 [[5]
  [5]
  [5]
  [5]]

 [[5]
  [5]
  [5]
  [5]]

 [[5]
  [5]
  [5]
  [5]]

 [[5]
  [5]
  [5]
  [5]]]


In [470]:
import numpy as np

def gradient_power(x, n, grad_output):
    """
    Computes the gradient for an element-wise power operation in backpropagation.

    Parameters:
        x (np.ndarray): Input array.
        n (float or np.ndarray): Power to which each element is raised.
        grad_output (np.ndarray): Gradient of the loss with respect to the output (z).

    Returns:
        np.ndarray: Gradient of the loss with respect to the input array (x).
    """
    # Compute the gradient using the chain rule
    grad_input = grad_output * n * np.power(x, n - 1)

    # Handle cases where x == 0 and n <= 1 to avoid NaNs or undefined values
    grad_input = np.where(x == 0, 0, grad_input)
    
    return grad_input

# Example
x = np.array([1.0, 2.0, 3.0, 0.0])  # Input
n = 2  # Power
z = np.power(x, n)  # Forward pass: z = x^n
grad_output = np.array([1.0, 1.0, 1.0, 1.0])  # Gradient of loss w.r.t z

# Backward pass
grad_input = gradient_power(x, n, grad_output)
print("Input Array:", x)
print("Output Array (z):", z)
print("Gradient w.r.t Input (x):", grad_input)


Input Array: [1. 2. 3. 0.]
Output Array (z): [1. 4. 9. 0.]
Gradient w.r.t Input (x): [2. 4. 6. 0.]
