# Indexing and slicing and iterating in NumPy

In [1]:
# import numpy
import numpy as np

In [2]:
# create a 1d array
x = np.linspace(0,9,10)

x[1]                     # just the second entry, remember 0 based indexing

# specific start and stop points (exclusive)
x[0:2]                   # the first and second entries in the array, so N>=0 and N<2 (note the < upper bound - not inclusive)

# assign the 2nd - 4th element to 100 (index 1,2,3)
x[1:4] = 100               
print(x[1:4])

# start, stop, step interval
print(x[0:8:2])

# reverse x
print(x[::-1])

# iterate over all elements in x
for i in x:
    print(i*3)    # then i takes the value of each element in x

[100. 100. 100.]
[  0. 100.   4.   6.]
[  9.   8.   7.   6.   5.   4. 100. 100. 100.   0.]
0.0
300.0
300.0
300.0
12.0
15.0
18.0
21.0
24.0
27.0


## multidimentional array indexing, slicing etc

In [3]:
x = np.round(np.random.rand(10,5)*10)   # generate a matrix of uniformly distributed random numbers over 0:10
print(x)

x[0,0]     # first row, first column
x[2,3]     # third row, 4th column

x[:, 3]    # all entries in the 4th column 
x[3, :]    # all entries in the 4th row
x[0:2, 4]  # first two entries of the 5th column
x[6, 2:4]  # 7th row, 3rd and 4th entries. 

x[6]       # if not all dims specified then missing values are considered complete slices
x[6,]      # these three ways of writing all do the same thing...
x[6,:]

# tricks...
print('last row: ', x[-1,:])     # last row
print('last column: ', x[:,-1])  # last column
print('last entry: ', x[-1,-1])  # last value

# iterating goes over the first dim (rows)
for r in x:
     print(r)
        
# can also iterate over all entries in the array using 'flat'
# will proceed along 1st row, then to 2nd row, etc. 
for a in x.flat:
    print(a)

[[ 7.  9.  2.  4.  7.]
 [ 4.  6.  9.  6.  2.]
 [ 6.  4. 10.  5.  0.]
 [ 5.  8.  7.  4.  7.]
 [ 5.  3.  7.  3.  0.]
 [ 2.  2.  7.  0.  8.]
 [ 8.  4.  9.  2.  3.]
 [ 6.  4.  2.  9.  1.]
 [ 5.  2.  6.  7.  7.]
 [ 9.  7.  5.  8.  9.]]
last row:  [9. 7. 5. 8. 9.]
last column:  [7. 2. 0. 7. 0. 8. 3. 1. 7. 9.]
last entry:  9.0
[7. 9. 2. 4. 7.]
[4. 6. 9. 6. 2.]
[ 6.  4. 10.  5.  0.]
[5. 8. 7. 4. 7.]
[5. 3. 7. 3. 0.]
[2. 2. 7. 0. 8.]
[8. 4. 9. 2. 3.]
[6. 4. 2. 9. 1.]
[5. 2. 6. 7. 7.]
[9. 7. 5. 8. 9.]
7.0
9.0
2.0
4.0
7.0
4.0
6.0
9.0
6.0
2.0
6.0
4.0
10.0
5.0
0.0
5.0
8.0
7.0
4.0
7.0
5.0
3.0
7.0
3.0
0.0
2.0
2.0
7.0
0.0
8.0
8.0
4.0
9.0
2.0
3.0
6.0
4.0
2.0
9.0
1.0
5.0
2.0
6.0
7.0
7.0
9.0
7.0
5.0
8.0
9.0


## pull out subset of rows and columns

In [4]:
# generate a matrix of random numbers over 0-1
x = np.random.rand(4,3) 
print(x)

# first two rows - note that you don't have to specify the 2nd dim - and note that 
# '2' here means rows 0 and 1 (not 0 through 2!)
y = x[:2] 
print('\n', y)

# can also take the last two rows...in the same manner...in this case rows 3 and 4
y = x[2:] 
print('\n', y)

# first two rows, 1st column
y = x[:2,0] 
print('\n', y)

# rows 3 - end, columns 2 - end
y = x[2:,1:]
print('\n', y)

[[0.60881378 0.36379067 0.16523446]
 [0.94525931 0.46399113 0.4232727 ]
 [0.25378117 0.63952801 0.17401094]
 [0.64854694 0.41173331 0.37854892]]

 [[0.60881378 0.36379067 0.16523446]
 [0.94525931 0.46399113 0.4232727 ]]

 [[0.25378117 0.63952801 0.17401094]
 [0.64854694 0.41173331 0.37854892]]

 [0.60881378 0.94525931]

 [[0.63952801 0.17401094]
 [0.41173331 0.37854892]]


<div class="alert alert-info">
important - slicing an array creates a view of it! if you change the view, you also will change the original data!
</div>

In [5]:
z = x[:,]
print(z.shape)

# change all values in z using [:]
z[:]=100     # so if you change data in z it will also change in x

print(x)

(4, 3)
[[100. 100. 100.]
 [100. 100. 100.]
 [100. 100. 100.]
 [100. 100. 100.]]


## Fancy indexing...using arrays to index arrays - used all the time in data analysis...

<div class="alert alert-info">
fancy indexing always makes a COPY of the data (unlike slicing which creates a view)!!!
</div>
using arrays to index arrays

In [6]:
# define an array
x = np.random.rand(3,4)

# index array - can be a tuple
y = (2,3)

# index
print(x)
print('\n x indexed at tuple y: ', x[y]) #y is a copy of x, not a view or a reference

[[0.61063419 0.67862135 0.87999456 0.96400224]
 [0.70059787 0.0939637  0.84425218 0.60929271]
 [0.57866315 0.40497119 0.14374871 0.63453529]]

 x indexed at tuple y:  0.6345352897041595


In [9]:
# can use fancy indexing to extract elements in a particular order
print(x)

# this will extract the 3rd row, then the 2nd row, then the first row
x[[2,1,0]]

# and this will extract all rows from the 2nd, 3rd and then 1st column. 
x[:,[1,2,0]]

[[0.61063419 0.67862135 0.87999456 0.96400224]
 [0.70059787 0.0939637  0.84425218 0.60929271]
 [0.57866315 0.40497119 0.14374871 0.63453529]]


array([[0.67862135, 0.87999456, 0.61063419],
       [0.0939637 , 0.84425218, 0.70059787],
       [0.40497119, 0.14374871, 0.57866315]])

In [10]:
# or can pass in multiple arrays...will return a 1D array 
# corresponding to each set of tuples (1,1) and (2,2) in this case
print(x)
x[[1,2],[1,2]]

[[0.61063419 0.67862135 0.87999456 0.96400224]
 [0.70059787 0.0939637  0.84425218 0.60929271]
 [0.57866315 0.40497119 0.14374871 0.63453529]]


array([0.0939637 , 0.14374871])

<div class="alert alert-info">
As opposed to selecting a set of tuples, you can also select a block of indices from a matrix. 
</div>

In [12]:
# grab the lower right chunk of data. 
print(x, '\n')
print(x[[1,2]][:,[2,3]])  # 2nd term here extracts all rows from columns, first arg says restrict to last two rows. 

[[0.61063419 0.67862135 0.87999456 0.96400224]
 [0.70059787 0.0939637  0.84425218 0.60929271]
 [0.57866315 0.40497119 0.14374871 0.63453529]] 

[[0.84425218 0.60929271]
 [0.14374871 0.63453529]]
