# NDAP: In-class demos and notes
For 9.24.2018.

## Binary (boolean) indexing & caution

In [1]:
import numpy as np

In [5]:
arr = np.array([-10, 15, 12, 45])
arr

array([-10,  15,  12,  45])

In [8]:
binary_inds = np.array([True, False, False, True])
arr[binary_inds]

array([-10,  45])

In [9]:
integer_inds = np.array([1, 0, 0, 1])
arr[integer_inds]
# this gives the weird and ridiculous result
# not something we want

array([ 15, -10, -10,  15])

In [11]:
# summing booleans treats True as 1 and False as 0
sum(binary_inds), sum(integer_inds)

(2, 2)

## Aggregation

In [13]:
# using numpy's sum method for aggregation
binary_inds.sum()

2

In [14]:
new_arr = np.array([-1, 15, 4.3, 22, -247])
new_arr

array([  -1. ,   15. ,    4.3,   22. , -247. ])

In [17]:
# the minimum
print(new_arr.min())
# or
print(np.min(new_arr))

-247.0
-247.0


In [18]:
new_arr.max()

22.0

In [19]:
# argmin gives you the index of the smallest element
new_arr.argmin()

4

In [20]:
# argmax gives you the index of the largest element
new_arr.argmax()

3

### Aggregation over multi-dimensional arrays treats the array as a single list of numbers

In [21]:
multidarr = np.random.rand(3,4,5)
multidarr

array([[[0.79629225, 0.68841275, 0.90252586, 0.97102866, 0.4020136 ],
        [0.60317576, 0.96181761, 0.84249761, 0.02573068, 0.0565352 ],
        [0.47630139, 0.47996907, 0.0612906 , 0.79650637, 0.96943564],
        [0.57579347, 0.81448118, 0.4904087 , 0.97746057, 0.54521238]],

       [[0.25443516, 0.6209341 , 0.84233689, 0.83313533, 0.79505772],
        [0.77018304, 0.69207702, 0.54769943, 0.59110585, 0.96973761],
        [0.43883902, 0.85364236, 0.20877702, 0.65346745, 0.04431184],
        [0.32694321, 0.78358956, 0.146451  , 0.58058277, 0.77287835]],

       [[0.54938303, 0.95775754, 0.74368556, 0.05043862, 0.37553552],
        [0.03677752, 0.60492259, 0.29897256, 0.69270464, 0.04357121],
        [0.97975705, 0.78266617, 0.45995998, 0.29842893, 0.24949076],
        [0.69761048, 0.3535772 , 0.66825135, 0.88425223, 0.2232827 ]]])

In [22]:
multidarr.shape

(3, 4, 5)

In [23]:
# a single number, the minimum over the whole array!
multidarr.min()

0.025730680225351632

In [24]:
multidarr.sum()

34.114099671590154

### Adding an "axis" parameter aggregates across one dimension of the array

In [25]:
demo_arr = np.array([[0,3,5],[1,2,-1]])
demo_arr

array([[ 0,  3,  5],
       [ 1,  2, -1]])

In [28]:
# we can get the minimum of just the first column by indexing and then aggregating
demo_arr[:,0].min()

0

In [29]:
# we can get the minimum over the whole array
demo_arr.min()

-1

In [30]:
# using axis=0 aggregates ACROSS ROWS (we get one value for each column)
demo_arr.min(axis=0)

array([ 0,  2, -1])

In [31]:
# using axis=1 aggregates ACROSS COLUMNS (we get one value for each row)
demo_arr.min(axis=1)

array([ 0, -1])

In [33]:
# aside:
# taking argmin of a multi-dimensional array gives you the index of the smallest element
# IN THE FLATTENED ARRAY. this is kind of weird and should probably be avoided most of
# the time
demo_arr.argmin() # index into flattened array!

5

In [37]:
# this gives the index of the smallest element across rows (i.e. in each column)
demo_arr.argmin(axis=0)

array([0, 1, 1])

In [38]:
# same, but across columns (i.e. in each row)
demo_arr.argmin(axis=1)

array([0, 2])

In [36]:
# argmin with a repeated element still only returns one index: the first one!
np.argmin([0, 1, 2, 3, 0]) # returns the first index!

0

In [39]:
demo_arr.shape

(2, 3)

In [40]:
# aggregation over an axis REMOVES THAT AXIS FROM THE ARRAY
demo_arr.min(axis=0).shape

(3,)

In [41]:
demo_arr.min(axis=1).shape

(2,)

In [42]:
big_demo_arr = np.zeros((2, 6, 3, 4, 9))
big_demo_arr.shape

(2, 6, 3, 4, 9)

In [43]:
big_demo_arr.min(axis=2).shape

(2, 6, 4, 9)