In [26]:
import numpy as np

In [72]:
a = np.array([1, 2, 3])
print( a.shape)

(3,)


In [73]:
b = np.broadcast_to( a, (3,3))
print( b)
print( b.shape)

[[1 2 3]
 [1 2 3]
 [1 2 3]]
(3, 3)


In [74]:
c = b[:,-1]
print( c)

[3 3 3]


In [53]:
d = np.mean( c, axis=0)
print( d)

6.0


In [18]:
a = np.array([[1, 2], [3, 4]])
print( np.mean(a, axis=0))

[2. 3.]


In [381]:
a = np.random.randn(3, 1)
print( a)
b = np.broadcast_to( a, (3,3))
print( b)

[[-1.45709782]
 [-0.42757954]
 [ 0.97437019]]
[[-1.45709782 -1.45709782 -1.45709782]
 [-0.42757954 -0.42757954 -0.42757954]
 [ 0.97437019  0.97437019  0.97437019]]


In [382]:
b = np.broadcast_to( a, (3,3,3))
print( b)

[[[-1.45709782 -1.45709782 -1.45709782]
  [-0.42757954 -0.42757954 -0.42757954]
  [ 0.97437019  0.97437019  0.97437019]]

 [[-1.45709782 -1.45709782 -1.45709782]
  [-0.42757954 -0.42757954 -0.42757954]
  [ 0.97437019  0.97437019  0.97437019]]

 [[-1.45709782 -1.45709782 -1.45709782]
  [-0.42757954 -0.42757954 -0.42757954]
  [ 0.97437019  0.97437019  0.97437019]]]


In [75]:
# Original array
original = np.array([[1], [2], [3]])  # Shape (3, 1)

# Broadcasted array
broadcasted = np.broadcast_to(original, (3, 3))  # Shape (3, 3)

# Reverse operation by summing over the broadcasted axis
reversed_array = np.sum(broadcasted, axis=1, keepdims=True) / broadcasted.shape[1]
print("Reversed Array:\n", reversed_array)  # Shape (3, 1)


Reversed Array:
 [[1.]
 [2.]
 [3.]]


In [416]:
def gradient_of_broadcast_to(grad_output, original_shape):
    """
    Compute the gradient for a broadcast_to operation during backpropagation.

    Parameters:
        grad_output (np.ndarray): Gradient of the loss with respect to the broadcasted array.
        original_shape (tuple): Shape of the original array before broadcasting.

    Returns:
        np.ndarray: Gradient of the loss with respect to the original array.
    """
    print( original_shape)
    if ( len( original_shape) == 1):
        return [ np.sum( grad_output) ]
        
    # Determine the broadcasted axes
    if ( len( original_shape) != len( grad_output.shape)):
        raise ValueError( "mismatching shapes")
    ## we collect the axis which have been broadcast. assuming the original shape was 1...
    axes_to_sum = [i for i, (b, o) in enumerate(zip(grad_output.shape, original_shape)) if o == 1]
    print( axes_to_sum)
    
    # Sum over broadcasted axes
    grad_input = np.sum(grad_output, axis=tuple(axes_to_sum), keepdims=True)
    
    # Reshape back to the original shape
    return grad_input.reshape(original_shape)

In [417]:
# Example
original = np.random.randn(3, 1)  # Shape (3, 1)
broadcasted = np.broadcast_to(original, (3, 3))  # Shape (3, 3)

# Reverse
reversed_array = gradient_of_broadcast_to(broadcasted, original.shape)
print("Broadcasted:\n", broadcasted)
print("Reversed Array:\n", reversed_array)

(3, 1)
[1]
Broadcasted:
 [[-1.17608915 -1.17608915 -1.17608915]
 [ 0.38782625  0.38782625  0.38782625]
 [-0.51772851 -0.51772851 -0.51772851]]
Reversed Array:
 [[-3.52826746]
 [ 1.16347876]
 [-1.55318554]]


In [418]:
original = np.random.randn(1, 3)  # Shape (1, 3)
broadcasted = np.broadcast_to(original, (3, 3))  # Shape (3, 3)

# Reverse
reversed_array = gradient_of_broadcast_to(broadcasted, original.shape)
print("Broadcasted:\n", broadcasted)
print("Reversed Array:\n", reversed_array)

(1, 3)
[0]
Broadcasted:
 [[-1.63659525  0.36919706 -1.68096508]
 [-1.63659525  0.36919706 -1.68096508]
 [-1.63659525  0.36919706 -1.68096508]]
Reversed Array:
 [[-4.90978576  1.10759118 -5.04289525]]


In [422]:
original = np.random.randn(1,)  # Shape (1, )
broadcasted = np.broadcast_to(original, (3, 3, 3))  # Shape (3, 3, 3)

# Reverse
reversed_array = gradient_of_broadcast_to(broadcasted, original.shape)
print("Original:\n", original)
print("Broadcasted:\n", broadcasted)

print("Reversed Array:\n", reversed_array)

(1,)
Original:
 [-0.58877852]
Broadcasted:
 [[[-0.58877852 -0.58877852 -0.58877852]
  [-0.58877852 -0.58877852 -0.58877852]
  [-0.58877852 -0.58877852 -0.58877852]]

 [[-0.58877852 -0.58877852 -0.58877852]
  [-0.58877852 -0.58877852 -0.58877852]
  [-0.58877852 -0.58877852 -0.58877852]]

 [[-0.58877852 -0.58877852 -0.58877852]
  [-0.58877852 -0.58877852 -0.58877852]
  [-0.58877852 -0.58877852 -0.58877852]]]
Reversed Array:
 [-15.897019955661571]


In [None]:
original = np.random.randn()
if ( np.isscalar(original)):
    original = np.array([original])
print( original)
broadcasted = np.broadcast_to(s, (3, 3, 3))  # Shape (3, 3, 3)

# Reverse
reversed_array = gradient_of_broadcast_to(broadcasted, original.shape)
print("Original:\n", original)
print("Broadcasted:\n", broadcasted)
print("Reversed Array:\n", reversed_array)

In [435]:
original = np.random.randn(5, 4, 1)
if ( np.isscalar(original)):
    original = np.array([original])
print( original)
broadcasted = np.broadcast_to(s, (5, 4, 3)) 

# Reverse
reversed_array = gradient_of_broadcast_to(broadcasted, original.shape)
print("Original:\n", original)
print("Broadcasted:\n", broadcasted)
print("Reversed Array:\n", reversed_array)

[[[ 0.76384899]
  [-0.38245104]
  [ 1.50120866]
  [ 0.54142751]]

 [[ 0.47968537]
  [ 0.02618534]
  [-0.71905542]
  [ 2.25566328]]

 [[-0.31420166]
  [-0.9262886 ]
  [-0.57175525]
  [-0.12369266]]

 [[ 0.7672116 ]
  [ 0.24611699]
  [ 0.21535203]
  [-1.88836109]]

 [[-0.93354901]
  [ 1.36224557]
  [-1.72469455]
  [ 0.27150522]]]
(5, 4, 1)
[2]
Original:
 [[[ 0.76384899]
  [-0.38245104]
  [ 1.50120866]
  [ 0.54142751]]

 [[ 0.47968537]
  [ 0.02618534]
  [-0.71905542]
  [ 2.25566328]]

 [[-0.31420166]
  [-0.9262886 ]
  [-0.57175525]
  [-0.12369266]]

 [[ 0.7672116 ]
  [ 0.24611699]
  [ 0.21535203]
  [-1.88836109]]

 [[-0.93354901]
  [ 1.36224557]
  [-1.72469455]
  [ 0.27150522]]]
Broadcasted:
 [[[1.17013176 1.17013176 1.17013176]
  [1.17013176 1.17013176 1.17013176]
  [1.17013176 1.17013176 1.17013176]
  [1.17013176 1.17013176 1.17013176]]

 [[1.17013176 1.17013176 1.17013176]
  [1.17013176 1.17013176 1.17013176]
  [1.17013176 1.17013176 1.17013176]
  [1.17013176 1.17013176 1.17013176]]

 

In [None]:
gradient_check(ndl.broadcast_to, ndl.Tensor(np.random.randn(3, 1)), shape=(3, 3))
    gradient_check(ndl.broadcast_to, ndl.Tensor(np.random.randn(1, 3)), shape=(3, 3))
    gradient_check(
        ndl.broadcast_to,
        ndl.Tensor(
            np.random.randn(
                1,
            )
        ),
        shape=(3, 3, 3),
    )
    gradient_check(ndl.broadcast_to, ndl.Tensor(np.random.randn()), shape=(3, 3, 3))
    gradient_check(
        ndl.broadcast_to, ndl.Tensor(np.random.randn(5, 4, 1)), shape=(5, 4, 3)
    )

In [452]:
import numpy as np

def gradient_summation(grad_output, input_shape, axis=None, keepdims=False):
    """
    Computes the gradient for a summation operation in backpropagation.

    Parameters:
        grad_output (np.ndarray): Gradient of the loss with respect to the summation result.
        input_shape (tuple): Shape of the original input to the summation operation.
        axis (int or tuple of ints, optional): Axis or axes along which the summation was performed.
        keepdims (bool, optional): Whether the summation kept the reduced dimensions.

    Returns:
        np.ndarray: Gradient of the loss with respect to the input array.
    """
    # If keepdims=False, expand dimensions of grad_output to match input_shape
    if not keepdims:
        grad_output = np.expand_dims(grad_output, axis=axis)

    # Broadcast grad_output to the input shape
    print( grad_output.shape)
    print( input_shape)
    grad_input = np.broadcast_to(grad_output, input_shape)

    return grad_input

# Example
x = np.array([[1, 2], [3, 4]])  # Shape (2, 2)

# Forward summation
y = np.sum(x, axis=1, keepdims=False)  # Shape (2,)

# Gradient of the loss w.r.t. y
grad_output = np.array([1, 2])  # Shape (2,)

# Backward: Compute gradient w.r.t. x
grad_input = gradient_summation(grad_output, input_shape=x.shape, axis=1, keepdims=False)
print("Gradient w.r.t. Input Array:\n", grad_input)


(2, 1)
(2, 2)
Gradient w.r.t. Input Array:
 [[1 1]
 [2 2]]


In [453]:
def test_summation_backward():
    gradient_check(ndl.summation, ndl.Tensor(np.random.randn(5, 4)), axes=(1,))
    gradient_check(ndl.summation, ndl.Tensor(np.random.randn(5, 4)), axes=(0,))
    gradient_check(ndl.summation, ndl.Tensor(np.random.randn(5, 4)), axes=(0, 1))
    gradient_check(ndl.summation, ndl.Tensor(np.random.randn(5, 4, 1)), axes=(0, 1))

In [454]:
# Example
x = np.array( np.random.randn(2, 2))  # Shape (2, 2)

# Forward summation
y = np.sum(x, axis=1, keepdims=False)  # Shape (2,)

# Gradient of the loss w.r.t. y
grad_output = np.array([1, 2])  # Shape (2,)

# Backward: Compute gradient w.r.t. x
grad_input = gradient_summation(grad_output, input_shape=x.shape, axis=1, keepdims=False)
print("Gradient w.r.t. Input Array:\n", grad_input)

(2, 1)
(2, 2)
Gradient w.r.t. Input Array:
 [[1 1]
 [2 2]]


In [459]:
# Example
x = np.array( np.random.randn(2, 2))  # Shape (2, 2)

# Forward summation
y = np.sum(x, axis=1, keepdims=False)  # Shape (2,)
print( y.shape)

# Gradient of the loss w.r.t. y
grad_output = np.array([1, 2])  # Shape (2,)

# Backward: Compute gradient w.r.t. x
grad_input = gradient_summation(grad_output, input_shape=x.shape, axis=0, keepdims=False)
print("Gradient w.r.t. Input Array:\n", grad_input)

(2,)
(1, 2)
(2, 2)
Gradient w.r.t. Input Array:
 [[1 2]
 [1 2]]


In [462]:
# Example
x = np.array( np.random.randn(5, 4))  # Shape (2, 2)

# Forward summation
y = np.sum(x, axis=tuple((0,1)), keepdims=False)  # Shape (2,)

# Gradient of the loss w.r.t. y
grad_output = np.array( np.random.randn(5, 4))

# Backward: Compute gradient w.r.t. x
grad_input = gradient_summation(grad_output, input_shape=x.shape, axis=tuple((0,1)), keepdims=False)
print("Gradient w.r.t. Input Array:\n", grad_input)

(1, 1, 5, 4)
(5, 4)


ValueError: input operand has more dimensions than allowed by the axis remapping