In [1]:
import numpy as np

# Array Broadcasting

Recall that for arrays of the same size, binary operations are performed on an element-by-element basis:

Broadcasting allows these types of binary operations to be performed on arrays of different sizes

In [2]:
a=np.array([0,1,2])
b=np.array([5,5,5])
a+b

array([5, 6, 7])

In [3]:
a+5

array([5, 6, 7])

Although not so, this opperation can be thought of the number 5 being broadcasted across elements of `a`

In [4]:
_10by10=np.linspace(1,100,100).reshape((10,10))
_10by10+5

array([[  6.,   7.,   8.,   9.,  10.,  11.,  12.,  13.,  14.,  15.],
       [ 16.,  17.,  18.,  19.,  20.,  21.,  22.,  23.,  24.,  25.],
       [ 26.,  27.,  28.,  29.,  30.,  31.,  32.,  33.,  34.,  35.],
       [ 36.,  37.,  38.,  39.,  40.,  41.,  42.,  43.,  44.,  45.],
       [ 46.,  47.,  48.,  49.,  50.,  51.,  52.,  53.,  54.,  55.],
       [ 56.,  57.,  58.,  59.,  60.,  61.,  62.,  63.,  64.,  65.],
       [ 66.,  67.,  68.,  69.,  70.,  71.,  72.,  73.,  74.,  75.],
       [ 76.,  77.,  78.,  79.,  80.,  81.,  82.,  83.,  84.,  85.],
       [ 86.,  87.,  88.,  89.,  90.,  91.,  92.,  93.,  94.,  95.],
       [ 96.,  97.,  98.,  99., 100., 101., 102., 103., 104., 105.]])

In [5]:
ones3by3=np.ones((3,3))
a,ones3by3

(array([0, 1, 2]),
 array([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]))

In [6]:
ones3by3+a
#Here the one-dimensional array a is stretched, or broadcast across the second dimension in order to match the shape of ones3by3

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

We can also do some more intricate stuff

In [7]:
a=np.arange(3)[np.newaxis,:]
b=np.arange(3)[:,np.newaxis]
a,b

(array([[0, 1, 2]]),
 array([[0],
        [1],
        [2]]))

![image.png](attachment:image.png)

## Rules of Broadcasting

Rule 1: If the two arrays differ in their number of dimensions, the shape of the one with fewer dimensions is stretched onto it's leading (left) dimension/ side and coppied(Broadcasted) to match the dimension of the array


In [8]:
M=np.random.randint(1,10,size=(5,4,3))
a=np.random.random((4,3))
print(M.shape,a.shape)
print(M+a)
#The array a is stretched and coppied(Broadcasted) across the third dimension to match dimensions of M

(5, 4, 3) (4, 3)
[[[9.91641284 1.45333295 7.00451849]
  [9.86232144 4.39636948 9.82201089]
  [4.66762997 7.61319591 3.18165647]
  [5.14947868 2.22878171 5.44337714]]

 [[2.91641284 6.45333295 4.00451849]
  [1.86232144 6.39636948 9.82201089]
  [7.66762997 5.61319591 8.18165647]
  [1.14947868 8.22878171 5.44337714]]

 [[4.91641284 6.45333295 5.00451849]
  [9.86232144 5.39636948 4.82201089]
  [2.66762997 2.61319591 8.18165647]
  [2.14947868 7.22878171 3.44337714]]

 [[8.91641284 5.45333295 1.00451849]
  [2.86232144 7.39636948 6.82201089]
  [7.66762997 2.61319591 5.18165647]
  [5.14947868 7.22878171 2.44337714]]

 [[2.91641284 7.45333295 9.00451849]
  [7.86232144 2.39636948 8.82201089]
  [7.66762997 9.61319591 1.18165647]
  [4.14947868 9.22878171 8.44337714]]]


In [9]:
M=np.random.randint(1,10,size=(5,4,3))
a=np.random.random((3))
print(M.shape,a.shape)
print(M+a)
#Both the arrays differ in dimensions and the first dimension of a is same as the first dimension of M ==1
#Thus the array a is stretched onto 2 dimensions further and coppied 5*3 times to match the dimensions of M

(5, 4, 3) (3,)
[[[5.32769107 6.09203983 3.70299486]
  [5.32769107 1.09203983 3.70299486]
  [5.32769107 5.09203983 2.70299486]
  [9.32769107 9.09203983 7.70299486]]

 [[7.32769107 4.09203983 8.70299486]
  [8.32769107 1.09203983 3.70299486]
  [5.32769107 7.09203983 7.70299486]
  [6.32769107 5.09203983 6.70299486]]

 [[8.32769107 7.09203983 3.70299486]
  [8.32769107 3.09203983 5.70299486]
  [6.32769107 4.09203983 7.70299486]
  [8.32769107 4.09203983 8.70299486]]

 [[9.32769107 3.09203983 2.70299486]
  [2.32769107 7.09203983 6.70299486]
  [4.32769107 8.09203983 1.70299486]
  [8.32769107 1.09203983 2.70299486]]

 [[2.32769107 9.09203983 5.70299486]
  [2.32769107 6.09203983 4.70299486]
  [1.32769107 9.09203983 2.70299486]
  [3.32769107 1.09203983 4.70299486]]]


In [10]:
M=np.random.randint(1,10,size=(5,4,3))
a=np.random.random((5,1,3))
print(M.shape,a.shape)
print(M+a)
#The array a is being stretched/broadcasted across the second dimension to match the dimensions of M

(5, 4, 3) (5, 1, 3)
[[[7.37820785 5.19331348 3.11761472]
  [3.37820785 4.19331348 7.11761472]
  [5.37820785 4.19331348 9.11761472]
  [4.37820785 7.19331348 9.11761472]]

 [[7.41584958 2.75394276 3.1213525 ]
  [5.41584958 6.75394276 7.1213525 ]
  [7.41584958 9.75394276 3.1213525 ]
  [8.41584958 6.75394276 8.1213525 ]]

 [[4.23240585 4.68925762 1.98835247]
  [5.23240585 6.68925762 1.98835247]
  [7.23240585 2.68925762 7.98835247]
  [6.23240585 2.68925762 4.98835247]]

 [[9.78770536 2.13874966 2.43512224]
  [4.78770536 8.13874966 8.43512224]
  [9.78770536 8.13874966 7.43512224]
  [8.78770536 5.13874966 8.43512224]]

 [[4.90452911 4.78465092 5.03937181]
  [4.90452911 9.78465092 8.03937181]
  [8.90452911 9.78465092 4.03937181]
  [5.90452911 6.78465092 6.03937181]]]


Rule 2: If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.


In [11]:
M=np.random.randint(1,10,size=(5,1,1))
a=np.random.random((3))
print(M.shape,a.shape)
print(M+a)
#Notice how none of the dimensions agree but all three of them have atleast one 1,
#Thus for a given dimension the array with shape one, that array is stretched to match the shape of the other array
#Eg. Looking at M and a 
#In third dimension, a(the array with shape 1) is stretched to match the shape of M i.e. 5
#In first dimension, M(the array with shape 1) is stretched to match the shape of M i.e. 3

(5, 1, 1) (3,)
[[[9.35732878 9.83483734 9.66364454]]

 [[7.35732878 7.83483734 7.66364454]]

 [[8.35732878 8.83483734 8.66364454]]

 [[2.35732878 2.83483734 2.66364454]]

 [[9.35732878 9.83483734 9.66364454]]]


Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is raised.


In [12]:
M=np.random.randint(1,10,size=(5,3,1))
a=np.random.random((4,2))
print(M.shape,a.shape)
print(M+a)

(5, 3, 1) (4, 2)


ValueError: operands could not be broadcast together with shapes (5,3,1) (4,2) 

## Practice

### Centering an Array

In [13]:
X=np.random.randint(100,size=(10,5,3))#Try playing with dimensions
Xmean=X.mean(axis=0)#Try playing with axis
Xmean

array([[46.8, 52.8, 57.9],
       [51.1, 53. , 47.6],
       [46.7, 32.4, 56. ],
       [58.3, 56.6, 59.3],
       [44.7, 41.2, 40.1]])

In [14]:
Xcentered=X-Xmean

In [15]:
Xcentered.mean(0)#The numbers are centered around 0

array([[ 2.84217094e-15,  2.84217094e-15,  1.42108547e-15],
       [-7.10542736e-16,  0.00000000e+00,  0.00000000e+00],
       [-2.84217094e-15,  1.42108547e-15,  0.00000000e+00],
       [ 2.84217094e-15, -1.42108547e-15,  2.84217094e-15],
       [-2.84217094e-15, -2.84217094e-15, -1.42108547e-15]])