In [1]:
import numpy as np

In [2]:
# demonstrating numpy efficiency

my_arr = np.arange(1000000)

my_list = list(range(1000000))

In [3]:
%time for _ in range(10): my_arr2 = my_arr * 2

Wall time: 22.9 ms


In [4]:
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]

Wall time: 839 ms


In [5]:
# Applying computations to ndarrays
# Random data
data = np.random.randn(2, 3)

In [6]:
print(data)

print(data * 10)

data + data

[[-0.43720044  1.29506539 -1.64519931]
 [-0.71695512 -1.12656005  1.23226073]]
[[ -4.37200437  12.95065391 -16.45199313]
 [ -7.16955116 -11.2656005   12.3226073 ]]


array([[-0.87440087,  2.59013078, -3.29039863],
       [-1.43391023, -2.2531201 ,  2.46452146]])

In [7]:
data.shape

(2, 3)

In [8]:
d1 = [6, 7, 8, 9, 0, 2.2]

arr1 = np.array(d1)

arr1

array([6. , 7. , 8. , 9. , 0. , 2.2])

In [9]:
d2 = [[4, 3, 2, 1], [9, 8, 7, 6]]

arr2 = np.array(d2)

# arr2.shape
# arr2.ndim

arr1.dtype

arr2.dtype

dtype('int32')

In [10]:
# Other numpy functions for ndarray creation

np.zeros((2, 3))

np.ones((3, 4))

np.empty((2, 3, 2))

type(np.arange(5))

numpy.ndarray

In [11]:
# Messing around with type

int_array = np.arange(10)

int_array

decimals = np.array([.11, .22], dtype=np.float64)

# Casting the dtype of another array
int_array.astype(decimals.dtype)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [12]:
# Indexing/slicing
arr5 = np.arange(10)

arr5[0:3]

# 'Broadcasting' onto arrays
arr5[0:3] = 50

arr5

array([50, 50, 50,  3,  4,  5,  6,  7,  8,  9])

In [13]:
# array slices as views of original array

arr_slice = arr5[0:3]
arr_slice

# Because arr_slice is just a 'view' of arr5[0:3]
# When I make a change to arr_slice[1] it technically modifies
# arr5[1]
arr_slice[1] = 123

# Crazy!
arr5

# If you did want to copy, just .copy()

arr_slice2 = arr5[0:3].copy()

arr_slice2[:] = 999

arr_slice2

# Voila!
arr5

array([ 50, 123,  50,   3,   4,   5,   6,   7,   8,   9])

In [14]:
# On to two dimension arrays!

arr_md = np.array([[1,2,3], [4,5,6], [7,8,9]])

arr_md

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [15]:
arr_md[0][2]
# same as
arr_md[0, 2]

# But both bad because of index chaining?
# Nvm I guess loc iloc aren't properties of NumPy arrays

for row in range(len(arr_md)):
    for column in range(len(arr_md)):
        print(arr_md[row, column])

1
2
3
4
5
6
7
8
9


In [16]:
# N Dimensional arrays

arr_3d = np.array([[[1,2,3], [4,5,6]], [[7,8,9], [10,11,12]]])

arr_3d[0, 1, 1]

arr_3d[1]

array([[ 7,  8,  9],
       [10, 11, 12]])

In [17]:
arr_md

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [18]:
# Same stuff but with slicing instead indexing
arr_md
arr_md[0:2]

# YOU CAN MIX INDEXES AND SLICES! Cool
arr_md[:3, 0]

arr_md[:2, 1:2]

arr_md[:, :2]

arr_md[1:3, 1:3]

# and if you want to destroy things

arr_md[1:3, 1:3] * 3

arr_md[1:3, 1:3] = 11

arr_md

array([[ 1,  2,  3],
       [ 4, 11, 11],
       [ 7, 11, 11]])

In [19]:
# Boolean Indexing!
names = np.array(['Dog', 'Cat', 'Frog', 'Frog', 'Cat', 'Dog'])

data = np.random.randn(6, 6)

data

array([[-0.37446181,  1.97983472,  1.30259044, -0.76068853, -0.38403333,
        -0.58921021],
       [ 0.86904303, -0.55828223,  0.58721247,  0.85384219, -0.44331003,
         0.36832809],
       [ 0.6618543 ,  2.00916752,  0.30178932,  0.71261075, -0.61082369,
         0.4436623 ],
       [ 0.51627589,  1.14575161, -0.80097744, -1.08740015, -0.88365621,
        -0.62028114],
       [-1.38023211, -1.23913218,  0.78733943,  1.38221026, -0.51527413,
        -0.30460805],
       [ 0.25880582, -0.04217327,  0.92321458,  1.2608482 ,  0.44009483,
         0.00794235]])

In [20]:
names == 'Cat'

# Interesting! This is if I just wanted the 'rows' that matched the boolean array for names == 'Cat'
# data[names == 'Cat']

# And same thing but for the columns
data[:, names == 'Cat']

array([[ 1.97983472, -0.38403333],
       [-0.55828223, -0.44331003],
       [ 2.00916752, -0.61082369],
       [ 1.14575161, -0.88365621],
       [-1.23913218, -0.51527413],
       [-0.04217327,  0.44009483]])

In [21]:
# And other flavors

data[names == 'Cat', :2]

data[:2, names == 'Cat']

data[names == 'Cat', 0]

array([ 0.86904303, -1.38023211])

In [22]:
# Negated conditions

names != 'Cat'

data[:, names != 'Cat']

data[:, ~(names == 'Cat')]

array([[-0.37446181,  1.30259044, -0.76068853, -0.58921021],
       [ 0.86904303,  0.58721247,  0.85384219,  0.36832809],
       [ 0.6618543 ,  0.30178932,  0.71261075,  0.4436623 ],
       [ 0.51627589, -0.80097744, -1.08740015, -0.62028114],
       [-1.38023211,  0.78733943,  1.38221026, -0.30460805],
       [ 0.25880582,  0.92321458,  1.2608482 ,  0.00794235]])

In [23]:
# Then functionalize this bad boy!

condition = names == 'Cat'

data[condition]

array([[ 0.86904303, -0.55828223,  0.58721247,  0.85384219, -0.44331003,
         0.36832809],
       [-1.38023211, -1.23913218,  0.78733943,  1.38221026, -0.51527413,
        -0.30460805]])

In [24]:
# Multiple conditions? Sure

cond = (names == 'Cat') | (names == 'Dog')

data[cond]

array([[-0.37446181,  1.97983472,  1.30259044, -0.76068853, -0.38403333,
        -0.58921021],
       [ 0.86904303, -0.55828223,  0.58721247,  0.85384219, -0.44331003,
         0.36832809],
       [-1.38023211, -1.23913218,  0.78733943,  1.38221026, -0.51527413,
        -0.30460805],
       [ 0.25880582, -0.04217327,  0.92321458,  1.2608482 ,  0.44009483,
         0.00794235]])

In [25]:
# Element-wise value assignment!

data[data < 0] = 0

data

array([[0.        , 1.97983472, 1.30259044, 0.        , 0.        ,
        0.        ],
       [0.86904303, 0.        , 0.58721247, 0.85384219, 0.        ,
        0.36832809],
       [0.6618543 , 2.00916752, 0.30178932, 0.71261075, 0.        ,
        0.4436623 ],
       [0.51627589, 1.14575161, 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.78733943, 1.38221026, 0.        ,
        0.        ],
       [0.25880582, 0.        , 0.92321458, 1.2608482 , 0.44009483,
        0.00794235]])

In [26]:
data[names == 'Cat'] = 7

data

array([[0.        , 1.97983472, 1.30259044, 0.        , 0.        ,
        0.        ],
       [7.        , 7.        , 7.        , 7.        , 7.        ,
        7.        ],
       [0.6618543 , 2.00916752, 0.30178932, 0.71261075, 0.        ,
        0.4436623 ],
       [0.51627589, 1.14575161, 0.        , 0.        , 0.        ,
        0.        ],
       [7.        , 7.        , 7.        , 7.        , 7.        ,
        7.        ],
       [0.25880582, 0.        , 0.92321458, 1.2608482 , 0.44009483,
        0.00794235]])

In [31]:
# Fancy indexing
arr6 = np.random.randint(low=1, high=10, size=(8, 4))

arr6

array([[1, 9, 8, 3],
       [3, 3, 7, 6],
       [2, 8, 5, 9],
       [3, 7, 2, 2],
       [3, 4, 3, 4],
       [4, 3, 9, 1],
       [3, 7, 6, 6],
       [9, 2, 6, 9]])

In [33]:
arr6[[0,2,4]]

arr6[[-3]]

array([[4, 3, 9, 1]])

In [37]:
arr7 = np.arange(32).reshape((4,8))

arr7

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29, 30, 31]])

In [40]:
arr7[[0,1,2,3], [0,1,3,2]]

# Passing multiple index arrays grabs just the matched positions from each value in the corresponding position of the array

array([ 0,  9, 19, 26])

In [46]:
# If you wanted to snag the whole area can do

arr7[[0,1,2,3]][:, [0,1,3,2]]

array([[ 0,  1,  3,  2],
       [ 8,  9, 11, 10],
       [16, 17, 19, 18],
       [24, 25, 27, 26]])

In [65]:
# Transposing arrays
arr8 = np.arange(20).reshape((2,10))

arr8

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]])

In [64]:
arr8.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [61]:
arr_dot = np.random.randn(6, 3)

arr_dot

array([[-1.25077398,  0.42938874, -0.59245356],
       [-1.76468315,  1.02323777, -1.17314513],
       [ 0.05385263, -0.43707569,  0.88298124],
       [ 1.60284383, -0.04241801,  0.15510463],
       [ 0.65469495, -1.91710305, -0.37604837],
       [-0.72742158, -0.49010607, -0.58642533]])

In [66]:
np.dot(arr8.T, arr8)

array([[100, 110, 120, 130, 140, 150, 160, 170, 180, 190],
       [110, 122, 134, 146, 158, 170, 182, 194, 206, 218],
       [120, 134, 148, 162, 176, 190, 204, 218, 232, 246],
       [130, 146, 162, 178, 194, 210, 226, 242, 258, 274],
       [140, 158, 176, 194, 212, 230, 248, 266, 284, 302],
       [150, 170, 190, 210, 230, 250, 270, 290, 310, 330],
       [160, 182, 204, 226, 248, 270, 292, 314, 336, 358],
       [170, 194, 218, 242, 266, 290, 314, 338, 362, 386],
       [180, 206, 232, 258, 284, 310, 336, 362, 388, 414],
       [190, 218, 246, 274, 302, 330, 358, 386, 414, 442]])

In [70]:
%timeit arr8.swapaxes(0, 1)

%timeit arr8.T

287 ns ± 6.92 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
149 ns ± 1.11 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
