# Python -numpy-

## Lists

In [2]:
height = [1.75, 1.60, 1.84, 1.67, 1.76]
weight = [62.0, 55.2, 70.2, 60.2, 85.4]

weight/height**2

TypeError: unsupported operand type(s) for ** or pow(): 'list' and 'int'

## Numpy example

In [3]:
import numpy as np

np_height = np.array(height)
np_weight = np.array(weight)

bmi = np_weight/np_height ** 2
bmi

array([20.24489796, 21.5625    , 20.73487713, 21.58557137, 27.5697314 ])

## Creating ndarray

In [6]:
import numpy as np

data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)
arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [5]:
data2 = [[1,2,3,4],[5,6,7,8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [8]:
print(np.zeros(5))
print(np.zeros((3, 5)))
print(np.ones((3,5)))

[0. 0. 0. 0. 0.]
[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]


In [10]:
data = [[1, 2, 3,4], [5, 6, 7, 8]]
print(np.zeros_like(data))
print(np.ones_like(data))

[[0 0 0 0]
 [0 0 0 0]]
[[1 1 1 1]
 [1 1 1 1]]


In [12]:
print(np.arange(6))
print(np.arange(2, 6))
print(np.arange(2, 6, 2))

[0 1 2 3 4 5]
[2 3 4 5]
[2 4]


In [13]:
arr = np.arange(1, 7).reshape(2, 3)
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [14]:
print(np.full((2, 3), 5))
print(np.eye(5))
print(np.random.random((3,5)))

[[5 5 5]
 [5 5 5]]
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]
[[0.75062139 0.77811008 0.71412344 0.68126178 0.71104185]
 [0.56708435 0.44720077 0.36954269 0.91784547 0.54720945]
 [0.03980811 0.990028   0.69495361 0.62289267 0.98371986]]


## Dimension, shape, data types

In [18]:
data = np.empty((2, 3, 3))

print(data)

print(data.ndim)
print(data.shape)
print(data.dtype)

[[[1.28256533e-311 5.28650241e-322 0.00000000e+000]
  [0.00000000e+000 1.11258582e-306 1.16095484e-028]
  [4.41988634e+222 3.09927492e-115 8.44244083e-053]]

 [[7.35874688e+223 6.58612145e+180 1.75300433e+243]
  [1.01484449e+242 2.55613623e+161 8.96862507e-096]
  [1.47278596e+179 9.08367237e+223 1.16466228e-028]]]
3
(2, 3, 3)
float64


## Operations between Array and Scalars

In [20]:
arr = np.array([[1,2,3], [4,5,6]])
print(arr)

print(1/arr)

print(arr ** 0.5)

print(arr * arr)

print(arr - arr)

[[1 2 3]
 [4 5 6]]
[[1.         0.5        0.33333333]
 [0.25       0.2        0.16666667]]
[[1.         1.41421356 1.73205081]
 [2.         2.23606798 2.44948974]]
[[ 1  4  9]
 [16 25 36]]
[[0 0 0]
 [0 0 0]]


## Universal Functions: Fast Element-wise Array Function

In [21]:
arr = np.arange(10)
print(arr)

print(np.sqrt(arr))
print(np.exp(arr))

[0 1 2 3 4 5 6 7 8 9]
[0.         1.         1.41421356 1.73205081 2.         2.23606798
 2.44948974 2.64575131 2.82842712 3.        ]
[1.00000000e+00 2.71828183e+00 7.38905610e+00 2.00855369e+01
 5.45981500e+01 1.48413159e+02 4.03428793e+02 1.09663316e+03
 2.98095799e+03 8.10308393e+03]


In [23]:
x = np.array([1.5063, 0.2736, -1.893])
y = np.array([2.9252, -0.7416, -0.7768])

np.maximum(x, y) # element-wise maximum

array([ 2.9252,  0.2736, -0.7768])

## Basic Indexing and Slicing

In [24]:
arr = np.arange(10)
print(arr)

print(arr[5])

print(arr[5:8])

print(arr[:])

arr[5:8] = 12
print(arr)

[0 1 2 3 4 5 6 7 8 9]
5
[5 6 7]
[0 1 2 3 4 5 6 7 8 9]
[ 0  1  2  3  4 12 12 12  8  9]


## Basic Indexing and Slicing

In [34]:
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

In [35]:
arr_slice = arr[5:8]
arr_slice

array([64, 64, 64])

In [36]:
arr_slice[1] = 12345
arr_slice

array([   64, 12345,    64])

In [37]:
arr

array([    0,     1,     2,     3,     4,    64, 12345,    64,     8,
           9])

In [38]:
k=arr[5:8].copy()
arr_slice[:] = 64
print(k)
print(arr)



[   64 12345    64]
[ 0  1  2  3  4 64 64 64  8  9]


## Basic Indexing and Slicing

In [40]:
arr2d = np.array([[1,2,3], [4,5,6], [7,8,9]])
print(arr2d[2])
print(arr2d[0][2])
print(0,2)

[7 8 9]
3
0 2


In [41]:
print(arr2d[1, 1:3])
print(arr2d[:2])
print(arr2d[:2, 1:])


print(arr2d[1, :2])
print(arr2d[2, :1])
print(arr2d[:, :1])
arr2d[:2, 1:] = 0
print(arr2d)

[5 6]
[[1 2 3]
 [4 5 6]]
[[2 3]
 [5 6]]
[4 5]
[7]
[[1]
 [4]
 [7]]
[[1 0 0]
 [4 0 0]
 [7 8 9]]


## Boolean Indexing

In [48]:
data = np.random.randn(4, 3) # np.random.random((4,3))
print(data)
tflist = [True, True, False, False] # row에 대한 것
print(data[tflist])

[[ 0.76178428  0.86573842  0.56100851]
 [-1.71633428  0.92827505 -0.0066343 ]
 [-0.81341766 -0.4201354  -0.21634581]
 [ 0.27623735  0.82686524  0.07636162]]
[[ 0.76178428  0.86573842  0.56100851]
 [-1.71633428  0.92827505 -0.0066343 ]]


In [49]:
data>0.5

array([[ True,  True,  True],
       [False,  True, False],
       [False, False, False],
       [False,  True, False]])

In [50]:
data[data>0.5]

array([0.76178428, 0.86573842, 0.56100851, 0.92827505, 0.82686524])

In [52]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7,4)
data

array([[-0.73447   , -0.39506586, -0.95771482, -0.59591615],
       [ 1.02232782, -1.62927202,  2.27465377, -0.58701634],
       [-0.79616741,  0.19154877, -0.3727195 ,  0.8663173 ],
       [-0.34248113, -0.05236472,  0.05769066,  1.69887523],
       [ 0.76579214, -0.49072152, -0.20222484,  0.58348004],
       [-0.0582089 , -0.2710421 , -1.42976261, -1.79231671],
       [-0.87150311,  0.7665467 ,  0.10013505,  0.72336209]])

In [54]:
print(names=='Bob')
data[names=='Bob']

[ True False False  True False False False]


array([[-0.73447   , -0.39506586, -0.95771482, -0.59591615],
       [-0.34248113, -0.05236472,  0.05769066,  1.69887523]])

In [55]:
print(names!= 'Bob')

print(data[names != 'Bob'])

data[~(names == 'Bob')]

[False  True  True False  True  True  True]
[[ 1.02232782 -1.62927202  2.27465377 -0.58701634]
 [-0.79616741  0.19154877 -0.3727195   0.8663173 ]
 [ 0.76579214 -0.49072152 -0.20222484  0.58348004]
 [-0.0582089  -0.2710421  -1.42976261 -1.79231671]
 [-0.87150311  0.7665467   0.10013505  0.72336209]]


array([[ 1.02232782, -1.62927202,  2.27465377, -0.58701634],
       [-0.79616741,  0.19154877, -0.3727195 ,  0.8663173 ],
       [ 0.76579214, -0.49072152, -0.20222484,  0.58348004],
       [-0.0582089 , -0.2710421 , -1.42976261, -1.79231671],
       [-0.87150311,  0.7665467 ,  0.10013505,  0.72336209]])

In [56]:
mask = (names == 'Bob') | (names == 'Will')

print(mask)
data[mask]

[ True False  True  True  True False False]


array([[-0.73447   , -0.39506586, -0.95771482, -0.59591615],
       [-0.79616741,  0.19154877, -0.3727195 ,  0.8663173 ],
       [-0.34248113, -0.05236472,  0.05769066,  1.69887523],
       [ 0.76579214, -0.49072152, -0.20222484,  0.58348004]])

## Expressing Conditional logic as Array Operations

In [60]:
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])

result = np.where(cond, xarr, yarr)
result

array([1.1, 2.2, 1.3, 1.4, 2.5])

In [61]:
arr = np.random.randn(4, 4)
np.where(arr>0, 2, -2)

array([[ 2, -2,  2,  2],
       [ 2, -2, -2,  2],
       [ 2, -2,  2, -2],
       [ 2, -2,  2,  2]])

In [62]:
np.where(arr>0, 2, arr) # set only positive values to 2

array([[ 2.        , -1.94510258,  2.        ,  2.        ],
       [ 2.        , -0.91996896, -0.38151579,  2.        ],
       [ 2.        , -1.67533545,  2.        , -0.66879576],
       [ 2.        , -0.63736421,  2.        ,  2.        ]])

In [63]:
arr = np.random.randn(5, 4) # normally-distributed data

print(arr.mean())
print(np.mean(arr))
print(arr.mean(axis = 0)) # (각 열의 평균)
print(arr.mean(axis=1)) # (각 행의 평균)

-0.2412994285004221
-0.2412994285004221
[-0.41619804 -0.63773596 -0.18083004  0.26956633]
[ 0.35569268 -0.24687907 -0.19509271 -0.29466697 -0.82555107]


In [67]:
print(arr.sum())
print(arr.sum(0)) #column
print(arr.sum(1)) #row

-4.825988570008442
[-2.0809902  -3.18867982 -0.9041502   1.34783164]
[ 1.42277071 -0.98751629 -0.78037083 -1.17866787 -3.30220429]


In [68]:
print(arr.cumsum())
print(arr.cumsum(0))
print(arr.cumsum(1))

print(arr.cumprod())
print(arr.cumprod(0))
print(arr.cumprod(1))

[ 0.81587642  1.05333762  0.06481288  1.42277071  1.82095745  0.50346128
  1.00178294  0.43525442 -0.5669368  -1.72887615 -0.71714718 -0.34511641
 -0.73124017 -1.05216393 -1.87248674 -1.52378428 -3.43052267 -4.05630441
 -4.66165768 -4.82598857]
[[ 0.81587642  0.2374612  -0.98852474  1.35795783]
 [ 1.21406316 -1.08003497 -0.49020308  0.79142931]
 [ 0.21187194 -2.24197432  0.52152588  1.16346008]
 [-0.17425181 -2.56289808 -0.29879693  1.51216254]
 [-2.0809902  -3.18867982 -0.9041502   1.34783164]]
[[ 0.81587642  1.05333762  0.06481288  1.42277071]
 [ 0.39818674 -0.91930943 -0.42098777 -0.98751629]
 [-1.00219122 -2.16413057 -1.1524016  -0.78037083]
 [-0.38612376 -0.70704752 -1.52737033 -1.17866787]
 [-1.90673838 -2.53252012 -3.13787339 -3.30220429]]
[ 8.15876423e-01  1.93738992e-01 -1.91515786e-01 -2.60070362e-01
 -1.03556569e-01  1.36435383e-01  6.79887058e-02 -3.85175410e-02
  3.86019414e-02 -4.48531146e-02 -4.53791954e-02 -1.68824571e-02
  6.51871777e-03 -2.09201141e-03  1.71612469e-03

## Methods for Boolean Arrays

In [72]:
arr = np.random.randn(100)
print((arr>0).sum()) # Number of positive values
print(arr>0)

45
[ True False  True False  True False  True  True False False  True False
 False  True False  True  True False  True False  True  True False  True
  True  True False  True  True False False False  True  True  True False
 False  True  True  True False False False False False False  True False
  True False False  True False  True False False False False  True False
  True False False  True  True  True False False  True  True False False
 False  True  True False  True  True False False  True  True False False
 False False False False  True False  True False False  True False False
  True  True False False]


In [74]:
print(arr[arr>0].sum())
print(np.where(arr>0, arr, 0).sum())

35.18286315809388
35.18286315809388


## Sorting

In [76]:
arr.sort()
arr

array([-2.49681055, -2.32401387, -2.2802024 , -2.2480342 , -1.86066298,
       -1.61136943, -1.60772307, -1.3979155 , -1.36781693, -1.0681049 ,
       -1.0657221 , -1.02313528, -1.02065552, -1.00830122, -0.96803021,
       -0.91433165, -0.86871932, -0.85022953, -0.77697172, -0.76829435,
       -0.7619057 , -0.74805918, -0.74001438, -0.69342196, -0.68685624,
       -0.67298878, -0.65166463, -0.65076258, -0.61203027, -0.61078008,
       -0.53795469, -0.53486133, -0.5225812 , -0.49155064, -0.47464762,
       -0.46762109, -0.46373858, -0.46130746, -0.42588391, -0.37579326,
       -0.36210709, -0.31219329, -0.30849964, -0.27017714, -0.24875761,
       -0.20881116, -0.19044305, -0.17564184, -0.16899249, -0.16492348,
       -0.16003575, -0.14716701, -0.12510368, -0.12364244, -0.02235848,
        0.00524466,  0.00655866,  0.02461271,  0.04031127,  0.14770092,
        0.14883407,  0.16164071,  0.17483758,  0.20509022,  0.24467458,
        0.25875409,  0.26626995,  0.36466217,  0.41056993,  0.41

In [83]:
arr = np.random.randn(5, 3)
np.sort(arr, 0)

array([[-1.72510017, -1.21873171, -0.10623613],
       [-0.23554728, -0.8621305 ,  0.06326425],
       [ 0.00851762,  0.0359954 ,  0.21550148],
       [ 0.39776211,  0.14924813,  0.48845656],
       [ 0.72665605,  0.66649987,  2.89677067]])

In [84]:
print(np.sort(arr, 1))
print(arr)

[[-1.21873171  0.48845656  0.72665605]
 [ 0.0359954   0.39776211  2.89677067]
 [-0.23554728 -0.10623613  0.66649987]
 [-1.72510017  0.14924813  0.21550148]
 [-0.8621305   0.00851762  0.06326425]]
[[ 0.72665605 -1.21873171  0.48845656]
 [ 0.39776211  0.0359954   2.89677067]
 [-0.23554728  0.66649987 -0.10623613]
 [-1.72510017  0.14924813  0.21550148]
 [ 0.00851762 -0.8621305   0.06326425]]


In [80]:
arr.sort(0)
print(arr)
arr.sort(1)
print(arr)

[[-2.17505359 -0.74771623 -0.4656736 ]
 [-1.49257401 -0.71240966 -0.40206293]
 [-0.4714405  -0.44957321  0.48655433]
 [-0.11473078 -0.03645908  0.87851501]
 [ 0.82537085  1.06363473  1.52128944]]
[[-2.17505359 -0.74771623 -0.4656736 ]
 [-1.49257401 -0.71240966 -0.40206293]
 [-0.4714405  -0.44957321  0.48655433]
 [-0.11473078 -0.03645908  0.87851501]
 [ 0.82537085  1.06363473  1.52128944]]


## Unique and Other Set logic

In [85]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)

array(['Bob', 'Joe', 'Will'], dtype='<U4')

In [86]:
ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])
np.unique(ints)

array([1, 2, 3, 4])

In [87]:
values = np.array([6,0,0,3,2,5,6])
np.in1d(values, [2, 3, 6]) # values가 오른쪽 리스트에 포함되어 있느냐?

array([ True, False, False,  True,  True, False,  True])

## File Input and Output with Arrays

In [88]:
arr = np.arange(10)

np.save('some_array', arr)

In [89]:
np.load('some_array.npy')

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [91]:
np.savez('array_archive.npz', a = arr1, b= arr2)

In [92]:
arch = np.load('array_archive.npz')
arch['b']

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [93]:
!HinJ.txt

In [94]:
arr = np.loadtxt('HinJ.txt', delimiter = ',')
arr

array([[  1.,   2.,   3.,   4.,   5.],
       [  6.,   7.,   8.,   9., 120.]])

## Random Number Generator

In [95]:
samples = np.random.normal(size = (4,4))
samples

array([[-0.1881553 ,  0.7970649 , -0.95516396, -2.66917656],
       [-1.04891634,  0.11461628, -0.10725148, -0.08151856],
       [-1.23896088, -0.4305237 ,  1.00336149,  0.69327068],
       [-0.3234864 ,  0.22111474,  0.25028294, -0.10429597]])

In [96]:
samples = np.random.binomial(10, 0.5, size = (4,4))
    # number of trials, probability of each trial

samples

array([[5, 6, 5, 7],
       [5, 8, 7, 9],
       [3, 3, 6, 5],
       [3, 5, 5, 7]])

In [97]:
np.random.seed(12345) # seed 값에 따라 random이 생성됨. seed가 같으면 random값 같음
draws = np.random.randint(0, 10, 100) # 100 numbers range from 0 to 10
draws

array([2, 5, 1, 4, 9, 5, 2, 1, 6, 1, 9, 7, 6, 0, 2, 9, 1, 2, 6, 7, 7, 7,
       8, 7, 1, 7, 4, 0, 3, 5, 7, 3, 1, 5, 2, 5, 3, 8, 5, 2, 5, 3, 0, 6,
       8, 0, 5, 6, 8, 9, 2, 2, 2, 9, 7, 5, 7, 1, 0, 9, 3, 0, 3, 0, 6, 2,
       1, 5, 8, 6, 5, 1, 0, 5, 8, 2, 9, 4, 7, 9, 5, 2, 4, 8, 2, 5, 6, 5,
       9, 6, 1, 9, 5, 0, 8, 8, 2, 0, 2, 0])