# NumPy Indexing and Selection

In this lecture we will discuss how to select elements or groups of elements from an array.

In [1]:
import numpy as np

In [5]:
#Creating sample array
arr = np.arange(100,111)

In [6]:
#Show
arr

array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110])

## Bracket Indexing and Selection
The simplest way to pick one or some elements of an array looks very similar to python lists:

In [7]:
#Get a value at an index
arr[8]

np.int64(108)

In [8]:
#Get values in a range
arr[1:5]

array([101, 102, 103, 104])

In [9]:
#Get values in a range
arr[0:5]

array([100, 101, 102, 103, 104])

In [10]:
#Get values in a range
arr[:5]

array([100, 101, 102, 103, 104])

## Broadcasting

NumPy arrays differ from normal Python lists because of their ability to broadcast. With lists, you can only reassign parts of a list with new parts of the same size and shape. That is, if you wanted to replace the first 5 elements in a list with a new value, you would have to pass in a new 5 element list. With NumPy arrays, you can broadcast a single value across a larger set of values:

In [12]:
#Setting a value with index range (Broadcasting)
arr[0:5]=1

#Show
arr

array([  1,   1,   1,   1,   1, 105, 106, 107, 108, 109, 110])

In [14]:
# Reset array, we'll see why I had to reset in  a moment
arr = np.arange(100,111)

#Show
arr

array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110])

In [15]:
arr[0:6]

array([100, 101, 102, 103, 104, 105])

In [16]:
#Important notes on Slices
slice_of_arr = arr[0:6]

#Show slice
slice_of_arr

array([100, 101, 102, 103, 104, 105])

In [17]:
slice_of_arr[:]

array([100, 101, 102, 103, 104, 105])

In [18]:
#Change Slice
slice_of_arr[:]=99

#Show Slice again
slice_of_arr

array([99, 99, 99, 99, 99, 99])

Now note the changes also occur in our original array!

In [19]:
arr

array([ 99,  99,  99,  99,  99,  99, 106, 107, 108, 109, 110])

Data is not copied, it's a view of the original array! This avoids memory problems!

In [20]:
#To get a copy, need to be explicit
arr = np.arange(100,111)
arr_copy = arr.copy()

arr_copy

array([100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110])

In [21]:
#Important notes on Slices
slice_of_arr = arr_copy[0:6]

#Show slice
slice_of_arr

array([100, 101, 102, 103, 104, 105])

In [22]:
#Change Slice
slice_of_arr[:]=99

#Show Slice again
slice_of_arr

array([99, 99, 99, 99, 99, 99])

In [23]:
print(f"arr_copy: {arr_copy}")
print(f"arr: {arr}")

arr_copy: [ 99  99  99  99  99  99 106 107 108 109 110]
arr: [100 101 102 103 104 105 106 107 108 109 110]


## Indexing a 2D array (matrices)

The general format is **arr_2d[row][col]** or **arr_2d[row,col]**. I recommend usually using the comma notation for clarity.

In [24]:
arr_2d = np.array(([5,10,15],[20,25,30],[35,40,45]))

#Show
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [25]:
arr_2d[0]

array([ 5, 10, 15])

In [26]:
#Indexing row
arr_2d[1]


array([20, 25, 30])

In [27]:
# Format is arr_2d[row][col] or arr_2d[row,col]

# Getting individual element value
arr_2d[1][0]

np.int64(20)

In [29]:
arr_2d[1][2]

np.int64(30)

In [28]:
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [30]:
# Getting individual element value
arr_2d[1,0]

np.int64(20)

In [31]:
arr_2d[1,2]

np.int64(30)

In [33]:
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [35]:
# 2D array slicing

#Shape (2,2) from top right corner
arr_2d[:2]

array([[ 5, 10, 15],
       [20, 25, 30]])

In [36]:
arr_2d[:2,1:]

array([[10, 15],
       [25, 30]])

In [37]:
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [38]:
#Shape bottom row
arr_2d[2]

array([35, 40, 45])

In [39]:
#Shape bottom row
arr_2d[2,:]

array([35, 40, 45])

In [40]:
arr1 = np.arange(10,40).reshape(6,5)
arr1

array([[10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34],
       [35, 36, 37, 38, 39]])

In [42]:
arr1[3:]

array([[25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34],
       [35, 36, 37, 38, 39]])

In [43]:
arr1[3:,2:4]

array([[27, 28],
       [32, 33],
       [37, 38]])

In [44]:
arr1.shape

(6, 5)

In [45]:
# Number of rows
arr1.shape[0]

6

In [46]:
# Number of columns
arr1.shape[1]

5

### Fancy Indexing

Fancy indexing allows you to select entire rows or columns out of order,to show this, let's quickly build out a numpy array:

In [47]:
#Set up matrix
arr2d = np.zeros((10,10))
arr2d

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [48]:
arr2d.shape

(10, 10)

In [49]:
arr2d.shape[1]

10

In [50]:
#Length of array
arr_length = arr2d.shape[0]

In [54]:
#Set up array
arr2d = np.zeros((3,4))
# print(arr2d)
arr_length = arr2d.shape[0]
# print(arr_length)
for i in range(arr_length):
    print(i)
    print(f"arr2d[i]: {arr2d[i]}")
    arr2d[i] = i
    print(arr2d)
    
# arr2d

0
arr2d[i]: [0. 0. 0. 0.]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
1
arr2d[i]: [0. 0. 0. 0.]
[[0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [0. 0. 0. 0.]]
2
arr2d[i]: [0. 0. 0. 0.]
[[0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [2. 2. 2. 2.]]


In [None]:
#Set up array
arr2d = np.zeros((10,10))
arr_length = arr2d.shape[0]
for i in range(arr_length):
    arr2d[i] = i
# 
arr2d

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
       [3., 3., 3., 3., 3., 3., 3., 3., 3., 3.],
       [4., 4., 4., 4., 4., 4., 4., 4., 4., 4.],
       [5., 5., 5., 5., 5., 5., 5., 5., 5., 5.],
       [6., 6., 6., 6., 6., 6., 6., 6., 6., 6.],
       [7., 7., 7., 7., 7., 7., 7., 7., 7., 7.],
       [8., 8., 8., 8., 8., 8., 8., 8., 8., 8.],
       [9., 9., 9., 9., 9., 9., 9., 9., 9., 9.]])

In [None]:
arr2d[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [57]:
len(arr2d)

10

Fancy indexing allows the following

In [59]:
arr2d[1]

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [60]:
arr2d

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
       [3., 3., 3., 3., 3., 3., 3., 3., 3., 3.],
       [4., 4., 4., 4., 4., 4., 4., 4., 4., 4.],
       [5., 5., 5., 5., 5., 5., 5., 5., 5., 5.],
       [6., 6., 6., 6., 6., 6., 6., 6., 6., 6.],
       [7., 7., 7., 7., 7., 7., 7., 7., 7., 7.],
       [8., 8., 8., 8., 8., 8., 8., 8., 8., 8.],
       [9., 9., 9., 9., 9., 9., 9., 9., 9., 9.]])

In [61]:
arr2d[[2,4,6,8]]

array([[2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
       [4., 4., 4., 4., 4., 4., 4., 4., 4., 4.],
       [6., 6., 6., 6., 6., 6., 6., 6., 6., 6.],
       [8., 8., 8., 8., 8., 8., 8., 8., 8., 8.]])

In [62]:
#Allows in any order
arr2d[[6,4,2,7]]

array([[6., 6., 6., 6., 6., 6., 6., 6., 6., 6.],
       [4., 4., 4., 4., 4., 4., 4., 4., 4., 4.],
       [2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
       [7., 7., 7., 7., 7., 7., 7., 7., 7., 7.]])

## More Indexing Help
Indexing a 2D matrix can be a bit confusing at first, especially when you start to add in step size. Try google image searching *NumPy indexing* to find useful images, like this one:

<img src= 'numpy_indexing.png' width=500/> <br>
Image source: http://www.scipy-lectures.org/intro/numpy/numpy.html

## Conditional Selection

This is a very fundamental concept that will directly translate to pandas later on, make sure you understand this part!

Let's briefly go over how to use brackets for selection based off of comparison operators.

In [63]:
arr = np.arange(1,11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [64]:
arr > 4

array([False, False, False, False,  True,  True,  True,  True,  True,
        True])

In [65]:
bool_arr = arr>4

In [66]:
bool_arr

array([False, False, False, False,  True,  True,  True,  True,  True,
        True])

In [67]:
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [68]:
arr[bool_arr]

array([ 5,  6,  7,  8,  9, 10])

In [70]:
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [69]:
arr[arr>4]

array([ 5,  6,  7,  8,  9, 10])

In [71]:
arr[arr>2]

array([ 3,  4,  5,  6,  7,  8,  9, 10])

In [73]:
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [76]:
x = 6
arr[arr>x]

array([ 7,  8,  9, 10])

In [77]:
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [None]:
arr%2==0  # Condition for even

array([False,  True, False,  True, False,  True, False,  True, False,
        True])

In [82]:
arr[arr%2==0]

array([ 2,  4,  6,  8, 10])

In [84]:
input_list = list(range(1,51))
print(input_list)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]


In [87]:
print([i for i in input_list if i%2==0])  # Filtering even item using list comprehension

[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50]


In [89]:
input_arr = np.array(input_list)
print(input_arr)

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50]


In [91]:
output_arr = input_arr[input_arr%2==0]
print(output_arr.tolist())

[2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50]


# Great Job!
