# indexing, slicing, stacking/spliting, and boolean masking

[numpy tutorial - slicing/stacking arrays, indexing with boolean arrays by codebasics](https://www.youtube.com/watch?v=_d_Ka-ks2a0)

<div align="center"><img src="img/httpatomoreillycomsourceoreillyimages1346880.png" width="50%"></div>

https://www.safaribooksonline.com/library/view/python-for-data/9781449323592/httpatomoreillycomsourceoreillyimages1346880.png

In [1]:
import numpy as np

# indexing

In [2]:
u = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
#             0, 1, 2, 3, 4, 5, 6, 7, 8, 9 : positive index
#           -10,-9,-8,-7,-6,-5,-4,-3,-2,-1 : negative index

In [3]:
u[0]

0

In [4]:
u[1]

1

In [5]:
u[2]

2

In [6]:
u[-1]

9

In [7]:
u[-2]

8

In [8]:
u[-10]

0

In [9]:
A = np.array([[0, 1, 2, 3],
              [1, 2, 3, 4],
              [2, 3, 4, 5],
              [3, 4, 5, 6]])

In [10]:
A[1,1]

2

In [11]:
A[1,-1]

4

# slicing

``` python
x[start:stop_exclusive:step]
```

In [12]:
u = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [13]:
# first five elements
u[:5] 

array([0, 1, 2, 3, 4])

In [14]:
# every other element
u[::2] 

array([0, 2, 4, 6, 8])

In [15]:
# every other element, starting at index 1
u[1::2] 

array([1, 3, 5, 7, 9])

In [16]:
# all elements, reversed
u[::-1] 

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [17]:
# reversed every other from index 5
u[5::-1] 

array([5, 4, 3, 2, 1, 0])

In [18]:
A = np.array([[0, 1, 2, 3],
              [1, 2, 3, 4],
              [2, 3, 4, 5],
              [3, 4, 5, 6]])

In [19]:
# two rows, three columns
A[:2, :3] 

array([[0, 1, 2],
       [1, 2, 3]])

In [20]:
# all rows, every other column
A[:, ::2] 

array([[0, 2],
       [1, 3],
       [2, 4],
       [3, 5]])

In [21]:
# first column
A[:,1] 

array([1, 2, 3, 4])

In [22]:
# first row
A[1,:] 

array([1, 2, 3, 4])

In [23]:
# Finally, subarray dimensions can even be reversed together
A[::-1, ::-1] 

array([[6, 5, 4, 3],
       [5, 4, 3, 2],
       [4, 3, 2, 1],
       [3, 2, 1, 0]])

# A[1] equivalent to A[1,:]

In [24]:
A[1]

array([1, 2, 3, 4])

In [25]:
A[1,:]

array([1, 2, 3, 4])

In [26]:
for row in A:
    print(row)

[0 1 2 3]
[1 2 3 4]
[2 3 4 5]
[3 4 5 6]


# np.concatenate, np.vstack((a,b)), np.hstack((a,b)), and np.dstack((a,b))

In [27]:
u = np.array([1, 2, 3])
v = np.array([3, 2, 1])
w = np.concatenate((u,v))

print(u)
print(v)
print(w)

[1 2 3]
[3 2 1]
[1 2 3 3 2 1]


In [28]:
u = np.array([[1, 2, 3]])
v = np.array([[4, 5, 6]])
w = np.concatenate((u, v)) # default: axis=0

print(u)
print()
print(v)
print()
print(w)

[[1 2 3]]

[[4 5 6]]

[[1 2 3]
 [4 5 6]]


In [29]:
u = np.array([[1, 2, 3]])
v = np.array([[4, 5, 6]])
w = np.concatenate((u, v), axis=0)

print(u)
print()
print(v)
print()
print(w)

[[1 2 3]]

[[4 5 6]]

[[1 2 3]
 [4 5 6]]


In [30]:
u = np.array([[1, 2, 3]])
v = np.array([[4, 5, 6]])
w = np.concatenate((u, v), axis=1)

print(u)
print()
print(v)
print()
print(w)

[[1 2 3]]

[[4 5 6]]

[[1 2 3 4 5 6]]


In [31]:
a = np.arange(6).reshape(2,3)
a

array([[0, 1, 2],
       [3, 4, 5]])

In [32]:
b = np.arange(6,12).reshape(2,3)
b

array([[ 6,  7,  8],
       [ 9, 10, 11]])

In [33]:
try:
    np.vstack(a,b)
except Exception as e:
    print(e)

vstack() takes 1 positional argument but 2 were given


In [34]:
np.vstack((a,b))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [35]:
np.hstack((a,b))

array([[ 0,  1,  2,  6,  7,  8],
       [ 3,  4,  5,  9, 10, 11]])

np.dstack will stack arrays along the third axis.

In [36]:
np.dstack((a,b))

array([[[ 0,  6],
        [ 1,  7],
        [ 2,  8]],

       [[ 3,  9],
        [ 4, 10],
        [ 5, 11]]])

# np.split, np.hsplit(a,3) and np.vsplit(a,3)

In [37]:
u = np.arange(10)
u1, u2, u3 = np.split(u, [3, 5])

print(u1)
print(u2)
print(u3)

[0 1 2]
[3 4]
[5 6 7 8 9]


In [38]:
a = np.arange(30).reshape(2,15)
a

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]])

In [39]:
np.hsplit(a,3)

[array([[ 0,  1,  2,  3,  4],
        [15, 16, 17, 18, 19]]), array([[ 5,  6,  7,  8,  9],
        [20, 21, 22, 23, 24]]), array([[10, 11, 12, 13, 14],
        [25, 26, 27, 28, 29]])]

In [40]:
for i in np.hsplit(a,3):
    print(i)
    print()

[[ 0  1  2  3  4]
 [15 16 17 18 19]]

[[ 5  6  7  8  9]
 [20 21 22 23 24]]

[[10 11 12 13 14]
 [25 26 27 28 29]]



In [41]:
a = np.arange(30).reshape(15,2)
a

array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11],
       [12, 13],
       [14, 15],
       [16, 17],
       [18, 19],
       [20, 21],
       [22, 23],
       [24, 25],
       [26, 27],
       [28, 29]])

In [42]:
np.vsplit(a,3)

[array([[0, 1],
        [2, 3],
        [4, 5],
        [6, 7],
        [8, 9]]), array([[10, 11],
        [12, 13],
        [14, 15],
        [16, 17],
        [18, 19]]), array([[20, 21],
        [22, 23],
        [24, 25],
        [26, 27],
        [28, 29]])]

In [43]:
for i in np.vsplit(a,3):
    print(i)
    print()

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]

[[10 11]
 [12 13]
 [14 15]
 [16 17]
 [18 19]]

[[20 21]
 [22 23]
 [24 25]
 [26 27]
 [28 29]]



# boolean masking

In [44]:
a = np.arange(12).reshape(3,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [45]:
b = (a > 4)
b

array([[False, False, False, False],
       [False,  True,  True,  True],
       [ True,  True,  True,  True]])

a[a>4]는 a의 모양을 유지할 수 없다.

In [46]:
a[b]

array([ 5,  6,  7,  8,  9, 10, 11])

In [47]:
a[a>4]

array([ 5,  6,  7,  8,  9, 10, 11])

a[a>4] = 4 는 a의 모양을 유지할 수 있다.

In [48]:
a[a>4] = 4
a

array([[0, 1, 2, 3],
       [4, 4, 4, 4],
       [4, 4, 4, 4]])

In [49]:
c = np.random.normal(0.,1.,(2,15))
c

array([[ 0.19286736, -1.54443018,  1.17647628, -0.46721771, -0.89677739,
         0.91338736, -0.93091168,  0.18712925, -0.03554622, -0.11359022,
         0.17510211, -3.2478737 ,  0.28637078, -0.03550957,  0.69956367],
       [ 2.62713844,  0.11134041,  1.65145434,  0.40013713, -1.82139982,
        -0.8769723 ,  0.19591175,  1.11892286, -0.41520874, -0.23608349,
        -1.28553582, -2.55081865, -0.37306344,  1.20945311, -1.11499724]])

c에서 음수인 멤버의 개수를 세어보자.

In [50]:
np.sum(c<0)

16

In [51]:
(c<0).sum()

16

In [52]:
a = np.random.normal(0.,1.,10)

print(a)
print((a > 0).sum())             # counts number of positive values
print((a > 0).any())             # checks if any value is True
print((a > 0).all())             # checks if all values are True

[ 0.94877131 -1.59699365 -0.67729007  0.91004895 -2.0479681  -1.63868948
  0.11731676  0.97724701 -0.08706098 -2.02174207]
4
True
False


# np.ix_

[np.ix_](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ix_.html)

In [53]:
a = np.arange(200).reshape(10,20)
a

array([[  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
         13,  14,  15,  16,  17,  18,  19],
       [ 20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,
         33,  34,  35,  36,  37,  38,  39],
       [ 40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
         53,  54,  55,  56,  57,  58,  59],
       [ 60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,
         73,  74,  75,  76,  77,  78,  79],
       [ 80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,
         93,  94,  95,  96,  97,  98,  99],
       [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112,
        113, 114, 115, 116, 117, 118, 119],
       [120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132,
        133, 134, 135, 136, 137, 138, 139],
       [140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152,
        153, 154, 155, 156, 157, 158, 159],
       [160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 1

In [54]:
a[::3,::6]

array([[  0,   6,  12,  18],
       [ 60,  66,  72,  78],
       [120, 126, 132, 138],
       [180, 186, 192, 198]])

In [55]:
a[np.ix_(np.arange(0,10,3),np.arange(0,20,6))]

array([[  0,   6,  12,  18],
       [ 60,  66,  72,  78],
       [120, 126, 132, 138],
       [180, 186, 192, 198]])

In [56]:
a[np.ix_([0,3,6,9],[0,6,12,18])]

array([[  0,   6,  12,  18],
       [ 60,  66,  72,  78],
       [120, 126, 132, 138],
       [180, 186, 192, 198]])

In [57]:
a[np.ix_([0,3,6,8,9],[0,6,12,17,18])]

array([[  0,   6,  12,  17,  18],
       [ 60,  66,  72,  77,  78],
       [120, 126, 132, 137, 138],
       [160, 166, 172, 177, 178],
       [180, 186, 192, 197, 198]])

In [58]:
np.ix_([0,3,6,8,9],[0,6,12,17,18])

(array([[0],
        [3],
        [6],
        [8],
        [9]]), array([[ 0,  6, 12, 17, 18]]))

# Subarrays as no-copy views

One important–and extremely useful–thing to know about array slices is that they return *views* rather than *copies* of the array data.
This is one area in which NumPy array slicing differs from Python list slicing: in lists, slices will be copies.
Consider our two-dimensional array from before:

Python Data Science Handbook by Jake Vanderplas [github](https://github.com/jakevdp/PythonDataScienceHandbook) 

In [59]:
a = np.random.randint(10, size=(3, 4))  # Two-dimensional array
print(a)

[[9 9 2 7]
 [8 9 9 6]
 [4 2 1 0]]


In [60]:
print("Let's extract a 2×2 subarray from this:")
a_sub = a[:2, :2]
print(a_sub)

Let's extract a 2×2 subarray from this:
[[9 9]
 [8 9]]


In [61]:
print("Now if we modify this subarray, we'll see that the original array is changed! Observe:")
a_sub[0, 0] = 99
print(a_sub)
print(a)

Now if we modify this subarray, we'll see that the original array is changed! Observe:
[[99  9]
 [ 8  9]]
[[99  9  2  7]
 [ 8  9  9  6]
 [ 4  2  1  0]]


This default behavior is actually quite useful: 
it means that when we work with large datasets, 
we can access and process pieces of these datasets without the need to copy the underlying data buffer.

Python Data Science Handbook by Jake Vanderplas [github](https://github.com/jakevdp/PythonDataScienceHandbook) 

딮 카피는 다음과 같다.

In [62]:
a = np.random.randint(10, size=(3, 4))  # Two-dimensional array
print(a)

[[7 7 8 8]
 [3 1 9 0]
 [6 4 9 9]]


In [63]:
print("Let's copy a 2×2 subarray from this:")
a_sub = a[:2, :2].copy()
print(a_sub)

Let's copy a 2×2 subarray from this:
[[7 7]
 [3 1]]


In [64]:
print("Now if we modify this subarray, we'll see that the original array is not changed! Observe:")
a_sub[0, 0] = 99
print(a_sub)
print(a)

Now if we modify this subarray, we'll see that the original array is not changed! Observe:
[[99  7]
 [ 3  1]]
[[7 7 8 8]
 [3 1 9 0]
 [6 4 9 9]]


# indexing reduces ndim by 1 whereas slicing doesn't

In [65]:
A.ndim

2

In [66]:
A[:2, :3].ndim

2

In [67]:
A[2, :3].ndim

1

In [68]:
A[:2, 3].ndim

1

In [69]:
A[2, 3].ndim

0