## numpy slicing
### boolean slicing

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

def pprint(arr):
    print(f'type: {type(arr)}')
    print(f'shape: {arr.shape}, dimension: {arr.ndim}, dtype: {arr.dtype}')
    print('Arrays Data:\n', arr)

In [2]:
a1 = np.arange(1, 25).reshape((4, 6))
pprint(a1)

type: <class 'numpy.ndarray'>
shape: (4, 6), dimension: 2, dtype: int32
Arrays Data:
 [[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]
 [13 14 15 16 17 18]
 [19 20 21 22 23 24]]


In [3]:
even_arr = a1 % 2 == 0
pprint(even_arr)
a1[even_arr]

type: <class 'numpy.ndarray'>
shape: (4, 6), dimension: 2, dtype: bool
Arrays Data:
 [[False  True False  True False  True]
 [False  True False  True False  True]
 [False  True False  True False  True]
 [False  True False  True False  True]]


array([ 2,  4,  6,  8, 10, 12, 14, 16, 18, 20, 22, 24])

In [4]:
np.sum(a1)

300

In [5]:
import pandas as pd
rains_in_seatle = pd.read_csv('./Sample/Seattle2014.csv')
rains_arr = rains_in_seatle['PRCP'].values
print('Data size:', len(rains_arr))

Data size: 365


In [6]:
days_arr = np.arange(0, 365)

In [7]:
# 1월의 날수
condition_jan = days_arr < 31

In [10]:
# 31일 까지만 True가 나옴
condition_jan[:40]

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True, False, False, False, False, False,
       False, False, False, False])

In [12]:
rains_jan = rains_arr[condition_jan]

In [115]:
len(rains_jan), np.sum(rains_jan), np.mean(rains_jan)

(30, 917, 30.566666666666666)

In [118]:
import pandas as pd
rains_in_seatle = pd.read_csv('./Sample/Seattle2014.csv')
rains_arr = rains_in_seatle['PRCP'].values
rains_arr[:360]

def rains(self):
    if self > 12 or self < 1:
        print("Error")
    else:
        days_arr = np.arange(0, 365)
        start = (self - 1)*30
        end = self*30

        rains_mon = days_arr[start:end]
        # print(rains_mon)
        print(f'{self}월 내린 총 강우량은: ', np.sum(rains_mon))
        print(f'{self}월 내린 평균 강우량은: ', np.mean(rains_mon))

# 테스트
rains(12)

12월 내린 총 강우량은:  10335
12월 내린 평균 강우량은:  344.5


## 팬시 인덱싱(Fancy Indexing)

In [52]:
arr = np.arange(1, 25).reshape((4, 6))
pprint(arr)

type: <class 'numpy.ndarray'>
shape: (4, 6), dimension: 2, dtype: int32
Arrays Data:
 [[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]
 [13 14 15 16 17 18]
 [19 20 21 22 23 24]]


In [53]:
[arr[0, 0], arr[1, 1], arr[2, 2], arr[3, 3]]

[1, 8, 15, 22]

In [54]:
arr[[0, 1, 2, 3], [0, 1, 2, 3]]

array([ 1,  8, 15, 22])

In [55]:
arr[:, [1, 2]]

array([[ 2,  3],
       [ 8,  9],
       [14, 15],
       [20, 21]])

# 배열 변환
## np.ravel()

In [69]:
a = np.random.randint(1, 10, (2, 3))
pprint(a)

type: <class 'numpy.ndarray'>
shape: (2, 3), dimension: 2, dtype: int32
Arrays Data:
 [[9 8 5]
 [3 4 6]]


In [70]:
a.ravel()

array([9, 8, 5, 3, 4, 6])

In [72]:
b = a.ravel()
pprint(b)

type: <class 'numpy.ndarray'>
shape: (6,), dimension: 1, dtype: int32
Arrays Data:
 [9 8 5 3 4 6]


In [73]:
b[0] = 99
pprint(b)

type: <class 'numpy.ndarray'>
shape: (6,), dimension: 1, dtype: int32
Arrays Data:
 [99  8  5  3  4  6]


In [74]:
# ravel은 a 행렬의 view이므로 b를 수정시 같이 반영
pprint(a)

type: <class 'numpy.ndarray'>
shape: (2, 3), dimension: 2, dtype: int32
Arrays Data:
 [[99  8  5]
 [ 3  4  6]]


## np.reshape()

In [81]:
a = np.random.randint(1, 10, (2, 3))
pprint(a)

type: <class 'numpy.ndarray'>
shape: (2, 3), dimension: 2, dtype: int32
Arrays Data:
 [[7 3 3]
 [4 5 4]]


In [82]:
result = a.reshape((3, 1, 2))
pprint(result)

type: <class 'numpy.ndarray'>
shape: (3, 1, 2), dimension: 3, dtype: int32
Arrays Data:
 [[[7 3]]

 [[3 4]]

 [[5 4]]]


## np.resize()
- 사이즈가 안맞는 경우, 0 추가하거나 뒤에 element 삭제

In [130]:
a = np.random.randint(1, 10, (2, 6))
pprint(a)

type: <class 'numpy.ndarray'>
shape: (2, 6), dimension: 2, dtype: int32
Arrays Data:
 [[1 8 8 6 3 4]
 [5 3 7 6 8 3]]


In [131]:
a.resize((6, 2))
pprint(a)

type: <class 'numpy.ndarray'>
shape: (6, 2), dimension: 2, dtype: int32
Arrays Data:
 [[1 8]
 [8 6]
 [3 4]
 [5 3]
 [7 6]
 [8 3]]


In [132]:
a.resize((2, 10))
pprint(a)

a.resize((2, 5))
pprint(a)

type: <class 'numpy.ndarray'>
shape: (2, 10), dimension: 2, dtype: int32
Arrays Data:
 [[1 8 8 6 3 4 5 3 7 6]
 [8 3 0 0 0 0 0 0 0 0]]
type: <class 'numpy.ndarray'>
shape: (2, 5), dimension: 2, dtype: int32
Arrays Data:
 [[1 8 8 6 3]
 [4 5 3 7 6]]


## np.append()
- axis가 없는 경우 1차원 배열로 변형되어 결합
- axis 값이 있는 경우, shape이 **꼭** 같아야함

In [133]:
a = np.arange(1, 10).reshape((3, 3))
b = np.arange(10, 19).reshape((3, 3))
pprint(a)
pprint(b)

type: <class 'numpy.ndarray'>
shape: (3, 3), dimension: 2, dtype: int32
Arrays Data:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]
type: <class 'numpy.ndarray'>
shape: (3, 3), dimension: 2, dtype: int32
Arrays Data:
 [[10 11 12]
 [13 14 15]
 [16 17 18]]


In [135]:
# axis가 없는 경우 1차원 배열로 변형되어 결합
result = np.append(a, b)
pprint(result)
pprint(a)

type: <class 'numpy.ndarray'>
shape: (18,), dimension: 1, dtype: int32
Arrays Data:
 [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
type: <class 'numpy.ndarray'>
shape: (3, 3), dimension: 2, dtype: int32
Arrays Data:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]


In [137]:
# axis = 0
result = np.append(a, b, axis=0)
pprint(result)

type: <class 'numpy.ndarray'>
shape: (6, 3), dimension: 2, dtype: int32
Arrays Data:
 [[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]
 [13 14 15]
 [16 17 18]]


In [139]:
# axis 값이 있는 경우, shape이 같아야함
different_shape_arr = np.arange(10, 20).reshape(2, 5)
pprint(different_shape_arr)
np.append(a, different_shape_arr, axis=0)

type: <class 'numpy.ndarray'>
shape: (2, 5), dimension: 2, dtype: int32
Arrays Data:
 [[10 11 12 13 14]
 [15 16 17 18 19]]


ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 3 and the array at index 1 has size 5

In [141]:
# axis = 1
result = np.append(a, b, axis=1)
pprint(result)

type: <class 'numpy.ndarray'>
shape: (3, 6), dimension: 2, dtype: int32
Arrays Data:
 [[ 1  2  3 10 11 12]
 [ 4  5  6 13 14 15]
 [ 7  8  9 16 17 18]]


In [142]:
# size가 동일해야함
different_shape_arr = np.arange(10, 20).reshape(5, 2)
pprint(different_shape_arr)
np.append(a, different_shape_arr, axis=1)

type: <class 'numpy.ndarray'>
shape: (5, 2), dimension: 2, dtype: int32
Arrays Data:
 [[10 11]
 [12 13]
 [14 15]
 [16 17]
 [18 19]]


ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 3 and the array at index 1 has size 5

## np.insert
- axis를 지정하지 않을 시 1차원으로 배열 변환
- 지정 index에 값 추가
- axis 추가 시 해당 index에 값을 1열 또는 1행으로 추가 또는 튜플 형태로 값 지정해서 추가

In [143]:
a = np.arange(1, 10).reshape(3, 3)
pprint(a)

type: <class 'numpy.ndarray'>
shape: (3, 3), dimension: 2, dtype: int32
Arrays Data:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]


In [144]:
np.insert(a, 1, 999)

array([  1, 999,   2,   3,   4,   5,   6,   7,   8,   9])

In [149]:
np.insert(a, 1, 999, axis=0)

array([[  1,   2,   3],
       [999, 999, 999],
       [  4,   5,   6],
       [  7,   8,   9]])

In [150]:
np.insert(a, 1, (9,9,9), axis=0)

array([[1, 2, 3],
       [9, 9, 9],
       [4, 5, 6],
       [7, 8, 9]])

In [146]:
np.insert(a, 1, 999, axis=1)

array([[  1, 999,   2,   3],
       [  4, 999,   5,   6],
       [  7, 999,   8,   9]])

## np.delete()
- axis 지정되지 않을 시 1차원 배열로 변환
- 삭제할 방향(행 또는 열)을 axis로 지정

In [151]:
a = np.arange(1, 10).reshape(3, 3)
pprint(a)

type: <class 'numpy.ndarray'>
shape: (3, 3), dimension: 2, dtype: int32
Arrays Data:
 [[1 2 3]
 [4 5 6]
 [7 8 9]]


In [152]:
np.delete(a, 1)

array([1, 3, 4, 5, 6, 7, 8, 9])

In [153]:
np.delete(a, 1, axis=0)

array([[1, 2, 3],
       [7, 8, 9]])

In [154]:
np.delete(a, 1, axis=1)

array([[1, 3],
       [4, 6],
       [7, 9]])

# 배열 결합
## np.concatenate
- default : 'axis=0'

In [155]:
a = np.arange(1, 7).reshape((2, 3))
b = np.arange(7, 13).reshape((2, 3))
pprint(a)
pprint(b)

type: <class 'numpy.ndarray'>
shape: (2, 3), dimension: 2, dtype: int32
Arrays Data:
 [[1 2 3]
 [4 5 6]]
type: <class 'numpy.ndarray'>
shape: (2, 3), dimension: 2, dtype: int32
Arrays Data:
 [[ 7  8  9]
 [10 11 12]]


In [156]:
# default, axis=0
result = np.concatenate((a, b))
result

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [159]:
result = np.concatenate((a, b), axis=0)
result

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [158]:
result = np.concatenate((a, b), axis=1)
result

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

## np.vstack

In [164]:
a = np.arange(1, 7).reshape(2, 3)
b = np.arange(7, 13).reshape(2, 3)
pprint(a)
pprint(b)

type: <class 'numpy.ndarray'>
shape: (2, 3), dimension: 2, dtype: int32
Arrays Data:
 [[1 2 3]
 [4 5 6]]
type: <class 'numpy.ndarray'>
shape: (2, 3), dimension: 2, dtype: int32
Arrays Data:
 [[ 7  8  9]
 [10 11 12]]


In [168]:
np.vstack((a, b))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [169]:
np.vstack((a, b, a, b))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12],
       [ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [165]:
a = np.arange(1, 7).reshape(2, 3)
b = np.arange(7, 13).reshape(2, 3)
pprint(a)
pprint(b)

type: <class 'numpy.ndarray'>
shape: (2, 3), dimension: 2, dtype: int32
Arrays Data:
 [[1 2 3]
 [4 5 6]]
type: <class 'numpy.ndarray'>
shape: (2, 3), dimension: 2, dtype: int32
Arrays Data:
 [[ 7  8  9]
 [10 11 12]]


In [166]:
np.hstack((a, b))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [170]:
np.hstack((a, b, a, b))

array([[ 1,  2,  3,  7,  8,  9,  1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12,  4,  5,  6, 10, 11, 12]])

# 배열 분리
## np.hsplit()

In [171]:
a = np.arange(1, 25).reshape((4, 6))
pprint(a)

type: <class 'numpy.ndarray'>
shape: (4, 6), dimension: 2, dtype: int32
Arrays Data:
 [[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]
 [13 14 15 16 17 18]
 [19 20 21 22 23 24]]


In [172]:
result = np.hsplit(a, 3)
result

[array([[ 1,  2],
        [ 7,  8],
        [13, 14],
        [19, 20]]),
 array([[ 3,  4],
        [ 9, 10],
        [15, 16],
        [21, 22]]),
 array([[ 5,  6],
        [11, 12],
        [17, 18],
        [23, 24]])]

In [173]:
np.hsplit(a, [1, 3, 5])

[array([[ 1],
        [ 7],
        [13],
        [19]]),
 array([[ 2,  3],
        [ 8,  9],
        [14, 15],
        [20, 21]]),
 array([[ 4,  5],
        [10, 11],
        [16, 17],
        [22, 23]]),
 array([[ 6],
        [12],
        [18],
        [24]])]

## np.vsplit

In [174]:
a = np.arange(1, 25).reshape((4, 6))
pprint(a)

type: <class 'numpy.ndarray'>
shape: (4, 6), dimension: 2, dtype: int32
Arrays Data:
 [[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]
 [13 14 15 16 17 18]
 [19 20 21 22 23 24]]


In [176]:
result = np.vsplit(a, 2)
result

[array([[ 1,  2,  3,  4,  5,  6],
        [ 7,  8,  9, 10, 11, 12]]),
 array([[13, 14, 15, 16, 17, 18],
        [19, 20, 21, 22, 23, 24]])]

In [178]:
np.array(result).shape

(2, 2, 6)

In [180]:
result = np.vsplit(a, 4)
result

[array([[1, 2, 3, 4, 5, 6]]),
 array([[ 7,  8,  9, 10, 11, 12]]),
 array([[13, 14, 15, 16, 17, 18]]),
 array([[19, 20, 21, 22, 23, 24]])]

In [182]:
np.array(result).shape

(4, 1, 6)

In [183]:
np.vsplit(a, [1, 3])

[array([[1, 2, 3, 4, 5, 6]]),
 array([[ 7,  8,  9, 10, 11, 12],
        [13, 14, 15, 16, 17, 18]]),
 array([[19, 20, 21, 22, 23, 24]])]