In [2]:
#2 Numpy Indexing and Selection

In [3]:
import numpy as np

In [4]:
arr = np.arange(0,11)

In [5]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [6]:
# OK, let's talk about Bracket indexing and selection. The simplest
# way to pick one or some elements in an array is exactly like we
# did with a Python list. You can grab the array and use bracket
# indexing to grab an index location, so for the element at index
# 8, you ask for index 8.

In [7]:
arr[8]

8

In [8]:
# for values in a range, use slice notation, which follows how
# a python list works -  or even a python string.
#You include the starting location, colon, 
# and a number UP TO, but not including, the last index. 
#So, here you should get 1 to 4.
arr[1:5]

array([1, 2, 3, 4])

In [9]:
# to get EVERYTHING up to but not including 5
arr[:5]

array([0, 1, 2, 3, 4])

In [10]:
#likewise, you can set a starting position and go all the way to 
#the end 
arr[3:]

array([ 3,  4,  5,  6,  7,  8,  9, 10])

In [11]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [13]:
# broadcasting is another topic we're going to talk about. 
# Numpy arrays differ from a normal Python list bec of their
# ability to 'broadcast'. In this array, you have the values
# 0 through 10. What you can do is broadcast an array assignment.
# you can actually grab a slice of this, eg 0 through 4, and set 
# that slice all equal to 100. And you can't do this with a normal 
# Python list. So, now if you ask for that array back, you end up
#seeing that those first five elements are all the number 100. 
#This is known as 'broadcasting'.

arr[0:5] = 100
arr

array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

In [14]:
# we'll reset that array (note that the change is permanent, here)
arr = np.arange(0,11)

In [15]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [19]:
# It's important to note that if you have some sort of variable
# that's assigned to a slice of an array eg.
slice_of_arr = arr[0:6]

In [20]:
slice_of_arr 
# so, checking out slice_of_arr, we get those first elements.

array([0, 1, 2, 3, 4, 5])

In [21]:
# if we reassign this as below
# (note that [:] just means 'select everything'),
# and check out our slice of the array, 
#everything is now set to 99.

slice_of_arr[:] = 99
slice_of_arr

array([99, 99, 99, 99, 99, 99])

In [22]:
# now, if we check back in our original array, notice that
# that slice was essentially only a pointer to that first half
# of the array. So, it actually changes the original array as well.
arr

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [24]:
# So, we can see that the data is actually not copied, it's just
# a view of the original array. the reason for this is to avoid
# memory problems. Often, when working with Numpy and pandas,
# which is built on top of Numpy, you're going to be working 
#with pretty large datasets, and you don't want every variable
# assignment to create another copy of your dataset, otherwise
# you would quickly run out of RAM if you had really large datasets.

# Instead, if you explicitly wanted to copy, you need to say so, eg.


In [25]:
arr_copy = arr.copy()

In [26]:
arr

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [27]:
# now, if we do something to copy, eg. grab everything [:] 
# and set it equal to 200, our array copy has been changed...

arr_copy[:] = 200
arr_copy

array([200, 200, 200, 200, 200, 200, 200, 200, 200, 200, 200])

In [29]:
# ... but our original array is still the same.

In [30]:
arr

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [31]:
# So, explicitly call 'copy' if you want a copy of your array.

In [33]:
# OK, let's talk about indexing a two dimensional array, 
# otherwise known as a matrix. The general X is as follows:
# eg. mat (for matrix)... mat[1, 2] for the row you want to index,
# then the column you want to index. The other way to do this is
# to split it into steps, so mat [row][col].
# Eg. we'll create an object called mat, and say that it's an 
# np.array, and make it a nested list, so passing in 
# the nested list items.

mat = np.array([[5,10,15],[20,25,30],[35,40,45]])

In [34]:
# to get back a matrix that looks like this
# lots of ways we could have done this matrix, 
# this is just the simplest, doing it manually.
mat

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [35]:
# let's say we want to index an entire row. We use mat, square
# brackets, and ask for the index position for the row we want.
# So, if we want the first row, we just pass in [0] and it returns
# the first row. [2] for the last row etc.
mat[0]

array([ 5, 10, 15])

In [36]:
mat[2]

array([35, 40, 45])

In [37]:
# to grab an individual element value - there are two ways - eg.
# the value 25. We can do mat[row position] and then 
# enter [column position], so eg. mat[1][1]
mat [1][1]

25

In [38]:
# More commonly, you'll see this done in one step:
mat[1,1]
# You'll see this common notation used in the documentation, 
# instead of the dual brackets, because this common notation 
# will also expand to slice notation. 

25

In [40]:
# Try it again, picking a number and see if you can index it out
# using this common notation. Choosing '30'.
mat[1, 2]

30

In [41]:
# For '15', it would be:
mat[0,2]

15

In [42]:
# Let's look at 2d array slicing. Going back to our matrix again.
mat

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [43]:
# Let's say we want to grab 10, 15 in first row, 25, 30 in second
# row. So, we're grabbing a little square within the larger matrix.

# So, we do this by saying the rows we want, as a slice. 
# Basically, we want everything from the first row, zero, 
# up to, but not including index 2. 
# That's mat[:2,]
# "So, that's rows zero and one." (why one, too?)
# Then, we want the similar aspect of the columns themselves.
# For the columns, we want to start at column index 1 and
# go all the way to the end. 
# That's mat[:2, 1:]

mat[:2, 1:]

array([[10, 15],
       [25, 30]])

In [46]:
# Go ahead and pick another square and see if you can index it.
# 20,25
# 35,40

mat[1:,:2]
# first, I did mat[1:,:3] and it was too wide.

array([[20, 25],
       [35, 40]])

In [48]:
# Your Jupyter notebook will have a couple more help examples, 
# We won't do these too often (the syntax), 
# bec we'll use Pandas + named indices, but this was just to 
# get your brain into the mode of thinking 
# in two dimensional slicing. 

# Instead, what we'll be using, next, is something called 
# conditional selection, a really fundamental concept that will
# directly translate to Pandas in the next section.  

# Conditional Selection
arr = np.arange(1,11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [49]:
# let's imagine that I wanted to grab elements off this array,
# based on some sort of comparison operator eg. I wanted to 
# grab every element that was greater than four. 
# The process for doing this is the following: I can actually
# compare my entire array to the number four ie. 
# 'where is this array greater than four?' 
arr > 4

array([False, False, False, False,  True,  True,  True,  True,  True,
        True])

In [50]:
# It returns back this array of boolean values, where the first 
# four - NB including 4 - are not greater than four, and the ones
# after are greater than four. (NB his Jupyter notebook's last
# element was 'dtype=bool', which I didn't have). 
# Then, I can assign this result to a new boolean array

bool_arr = arr > 4

In [51]:
# and then I can grab my original array and pass in that array
# of boolean values. What it ends up doing is it only returns 
# the array where the boolean values happen to be true. So, it 
# ends up returning only the values where the index location 
# matches 'True.' 

arr[bool_arr]

array([ 5,  6,  7,  8,  9, 10])

In [52]:
# Commonly, though, you don't separate this into three steps.
# Instead, you grab your array: arr[]
# and say 'OK, give me back where the array is greater than four':
# arr[arr > 4]
# and you get this sort of notation, where you see arr(ay) twice. 
# And, as a result, you get the same thing, but this is what is  
# more commonly done when you're working with Numpy and Pandas. 
# It's know as Conditional Selection. 

arr[arr>4]

array([ 5,  6,  7,  8,  9, 10])

In [53]:
# You can do the same thing with any sort of comparison operator.
# eg.
arr[arr <= 9]

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
# That's the basics of selection and indexing with NumPy.