# Numpy tutorial

### Import libraries

In [1]:
import numpy as np
import string

### Creating 1 x d array

In [2]:
sample_list = [1,2,3,4,5,6,7,8,9,10]
# default data type is the data type in the list
np.array(sample_list)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [3]:
# can even be array of strings
np.array(list(string.ascii_letters))

array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
       'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
       'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
       'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'],
      dtype='<U1')

In [4]:
# data type of array can be set to specific type
np.array(sample_list,dtype=np.float32)

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.], dtype=float32)

### Creating a sequence of array numbers

In [5]:
# creates array sequence up to n-1 numbers (similar "range" but in array form)
np.arange(11)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [6]:
# customize start, stop and step size of array numbers
np.arange(2, 11, 2)

array([ 2,  4,  6,  8, 10])

### Creating custom n x d array

In [7]:
sample_list_2 = list(range(10, 20))
array_2d = np.array([sample_list, sample_list_2])

### Specialized preset array set-ups

In [8]:
# create zeros array of given (x,y) dimension
np.zeros((4,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [9]:
# create array of 1s (data type can be specified)
np.ones((2,4), dtype=np.int32)

array([[1, 1, 1, 1],
       [1, 1, 1, 1]])

In [10]:
# create array of specific value
np.full((2,3),4)

array([[4, 4, 4],
       [4, 4, 4]])

In [11]:
# create array of random int numbers
np.random.randint(5, 25, size=(2,4))

array([[ 6,  8,  9, 17],
       [10,  6, 24, 17]])

In [12]:
# create array of random numbers between 0 and 1 (from uniform distribution)
np.random.rand(5,)

array([0.98922097, 0.6099171 , 0.65066929, 0.85471729, 0.61422071])

In [13]:
# create array of random numbers (from Normal distribution)
print(np.random.randn(3,4),'\n')

print('Check mean: ', np.random.randn(3000,3000).mean())
print('Check standard deviation: ', np.random.randn(3000,3000).std())

[[ 0.47901532  2.04948849 -1.14890001  0.52533713]
 [ 1.35549787 -0.8768839   1.10023188  0.07130925]
 [-0.59767039  1.34448327  0.25086005 -0.29280297]] 

Check mean:  0.0005427387464689994
Check standard deviation:  0.9998245946834824


In [14]:
# create array from random selection given list of choices (default probabilities is uniform, though can still be customized)
np.random.choice(['dog', 'cat', 'rabbit', 'turtle', 'lizard'], size=(3,4), replace=True, p=[0.05, 0.15, 0.4, 0.2, 0.2])

array([['cat', 'lizard', 'rabbit', 'rabbit'],
       ['lizard', 'lizard', 'lizard', 'lizard'],
       ['rabbit', 'turtle', 'cat', 'lizard']], dtype='<U6')

### Array properties

In [15]:
print('Shape:', array_2d.shape)
print('Row:', array_2d.shape[0])
print('Columns:', array_2d.shape[1])
print('Size (n * d):', array_2d.size)
print('Dimensions (n):', array_2d.ndim)

Shape: (2, 10)
Row: 2
Columns: 10
Size (n * d): 20
Dimensions (n): 2


### Changing dimensions of array

In [16]:
arr_reshape = np.random.randint(30, 94, size=(4,6))
print(arr_reshape)
np.reshape(arr_reshape, (3,8))

[[69 54 58 41 40 50]
 [93 33 40 32 54 51]
 [51 90 79 62 69 75]
 [63 35 54 80 30 42]]


array([[69, 54, 58, 41, 40, 50, 93, 33],
       [40, 32, 54, 51, 51, 90, 79, 62],
       [69, 75, 63, 35, 54, 80, 30, 42]])

In [17]:
# can input '-1' to respective dimension if you do not know the exact size
print('Reshaped as (2,12):\n', np.reshape(arr_reshape, (2,-1)))
print('Reshaped as (8,3):\n', np.reshape(arr_reshape, (-1,3)))

Reshaped as (2,12):
 [[69 54 58 41 40 50 93 33 40 32 54 51]
 [51 90 79 62 69 75 63 35 54 80 30 42]]
Reshaped as (8,3):
 [[69 54 58]
 [41 40 50]
 [93 33 40]
 [32 54 51]
 [51 90 79]
 [62 69 75]
 [63 35 54]
 [80 30 42]]


In [18]:
# transforming from (n x d) to (1 x d) array
print('To 1-D (deep copy): ', arr_reshape.flatten())
print('To 1-D (shallow copy): ', arr_reshape.ravel())

To 1-D (deep copy):  [69 54 58 41 40 50 93 33 40 32 54 51 51 90 79 62 69 75 63 35 54 80 30 42]
To 1-D (shallow copy):  [69 54 58 41 40 50 93 33 40 32 54 51 51 90 79 62 69 75 63 35 54 80 30 42]


In [19]:
# transposing array (swap rows, columns or from (n x d) to (d x n))
arr_reshape.transpose()

array([[69, 93, 51, 63],
       [54, 33, 90, 35],
       [58, 40, 79, 54],
       [41, 32, 62, 80],
       [40, 54, 69, 30],
       [50, 51, 75, 42]])

In [20]:
# adding new axis to array
print('Current shape: ', arr_reshape.shape)
arr_reshape_new = np.expand_dims(arr_reshape, axis=0)
print('New shape (with new axis made at index of axis): ', arr_reshape_new.shape)

Current shape:  (4, 6)
New shape (with new axis made at index of axis):  (1, 4, 6)


In [21]:
# removing axis from an array
print('Current shape: ', arr_reshape_new.shape)
arr_reshape_remove = np.squeeze(arr_reshape_new, axis=0)
print('New shape (with new axis made at index of axis): ', arr_reshape_remove.shape)

Current shape:  (1, 4, 6)
New shape (with new axis made at index of axis):  (4, 6)


### Combining arrays

In [22]:
# combine vertically
new_arr_v = np.arange(arr_reshape.shape[1])
np.vstack((new_arr_v, arr_reshape))

array([[ 0,  1,  2,  3,  4,  5],
       [69, 54, 58, 41, 40, 50],
       [93, 33, 40, 32, 54, 51],
       [51, 90, 79, 62, 69, 75],
       [63, 35, 54, 80, 30, 42]])

In [23]:
# combine horizontally
new_arr_h = np.arange(arr_reshape.shape[0]).reshape(-1,1)
np.hstack((arr_reshape, new_arr_h))

array([[69, 54, 58, 41, 40, 50,  0],
       [93, 33, 40, 32, 54, 51,  1],
       [51, 90, 79, 62, 69, 75,  2],
       [63, 35, 54, 80, 30, 42,  3]])

In [24]:
# combine depth-wise (in a new-axis)
arr_reshape_2 = np.random.randint(2, size=(4,6))
arr_3d = np.dstack((arr_reshape, arr_reshape_2))
print('New array after combination:\n', arr_3d, '\n')
print('Size: ', np.dstack((arr_reshape, arr_reshape_2)).shape)

New array after combination:
 [[[69  0]
  [54  0]
  [58  0]
  [41  0]
  [40  1]
  [50  1]]

 [[93  1]
  [33  1]
  [40  0]
  [32  1]
  [54  1]
  [51  1]]

 [[51  0]
  [90  1]
  [79  1]
  [62  0]
  [69  1]
  [75  1]]

 [[63  1]
  [35  1]
  [54  1]
  [80  0]
  [30  0]
  [42  0]]] 

Size:  (4, 6, 2)


In [25]:
# concatenate arrays - can be done side-by-side (axis=1) or stacked on top (axis=0)
rand_arr1 = np.random.randint(200, 300, size=(1,5))
rand_arr2 = np.random.randint(100, 200, size=(1,5))
rand_arr3 = np.random.randint(100, size=(1,5))

print('Axis=0:\n', np.concatenate((rand_arr1, rand_arr2, rand_arr3)), '\n')
print('Axis=1:\n', np.concatenate((rand_arr1, rand_arr2, rand_arr3), axis=1))

Axis=0:
 [[241 288 248 215 239]
 [188 125 128 175 124]
 [ 96  51  29  16  18]] 

Axis=1:
 [[241 288 248 215 239 188 125 128 175 124  96  51  29  16  18]]


In [26]:
# append can add array at the end of existing array
rand_arr12 = np.concatenate((rand_arr1, rand_arr2))
np.append(rand_arr12, rand_arr3, axis=0)

array([[241, 288, 248, 215, 239],
       [188, 125, 128, 175, 124],
       [ 96,  51,  29,  16,  18]])

### Slicing arrays

In [27]:
# similar slicing lists
# original array
print('Original array:\n', array_2d, '\n')
# 1) access 2nd row
print('2nd row: ', array_2d[1])

Original array:
 [[ 1  2  3  4  5  6  7  8  9 10]
 [10 11 12 13 14 15 16 17 18 19]] 

2nd row:  [10 11 12 13 14 15 16 17 18 19]


In [28]:
# 2) access 5th column
print('5th column:', array_2d[:, 4])

5th column: [ 5 14]


In [29]:
# 3) access rows 1 and 2 & columns 2 to 3
array_2d_2 = array_2d.ravel().reshape(4,5)
print('Original array:\n', array_2d_2, '\n')
print('Rows 1,2 & Columns 2-3:\n', array_2d_2[:2, 2:4])

Original array:
 [[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [10 11 12 13 14]
 [15 16 17 18 19]] 

Rows 1,2 & Columns 2-3:
 [[3 4]
 [8 9]]


In [30]:
# 4) access rows 2, columns 0-1, depth 1-2
array_3d_2 = arr_3d.ravel().reshape(4,4,3)
print('Original array:\n', array_3d_2, '\n')
print('Rows 2, columns 2-3, depth 1-2:\n', array_3d_2[1:3, 2, 0:2])

Original array:
 [[[69  0 54]
  [ 0 58  0]
  [41  0 40]
  [ 1 50  1]]

 [[93  1 33]
  [ 1 40  0]
  [32  1 54]
  [ 1 51  1]]

 [[51  0 90]
  [ 1 79  1]
  [62  0 69]
  [ 1 75  1]]

 [[63  1 35]
  [ 1 54  1]
  [80  0 30]
  [ 0 42  0]]] 

Rows 2, columns 2-3, depth 1-2:
 [[32  1]
 [62  0]]


### Arithmetics with arrays

In [31]:
a1 = np.random.randint(4, 12, size=(3,5))
a2 = np.array([[1], 
               [2], 
               [4]])
a3 = np.array([[1,3,5,7,9]])
print('Size of a1: ', a1.shape)
print('Size of a2: ', a2.shape)
print('Size of a3: ', a3.shape)

# addition via broadcasting - note row dimension are the same for both arrays
print('Original a1:\n', a1, '\n')
print('After adding a2:\n', a1+a2)

Size of a1:  (3, 5)
Size of a2:  (3, 1)
Size of a3:  (1, 5)
Original a1:
 [[10  6 11  6  4]
 [11 11  8  9  9]
 [ 9  5  6 11  8]] 

After adding a2:
 [[11  7 12  7  5]
 [13 13 10 11 11]
 [13  9 10 15 12]]


In [32]:
# multiplication via broadcasting - note column dimension are the same for both arrays
print('Original a1:\n', a1, '\n')
print('After multiplying a3:\n', a1*a3)

Original a1:
 [[10  6 11  6  4]
 [11 11  8  9  9]
 [ 9  5  6 11  8]] 

After multiplying a3:
 [[10 18 55 42 36]
 [11 33 40 63 81]
 [ 9 15 30 77 72]]


### Descriptive statistics

In [33]:
# min
print('Min (row-wise): ', a1.min(axis=1))
print('Min (column-wise): ', a1.min(axis=0))
# max
print('Max (row-wise): ', a1.max(axis=1))
print('Max (column-wise): ', a1.max(axis=0))
# index of min value (argmin)
print('Index of Min (row-wise): ', a1.argmin(axis=1))
print('Index of Min (column-wise): ', a1.argmin(axis=0))
# index of max value (argmax)
print('Index of Max (row-wise): ', a1.argmax(axis=1))
print('Index of Max (column-wise): ', a1.argmax(axis=0))
# mean
print('Mean (row-wise): ', a1.mean(axis=1))
print('Mean (column-wise): ', a1.mean(axis=0))
# median
print('Median (row-wise): ', np.median(a1, axis=1))
print('Median (column-wise): ', np.median(a1, axis=0))
# standard deviation
print('Std Dev (row-wise): ', np.std(a1, axis=1))
print('Std Dev (column-wise): ', np.std(a1, axis=0))

Min (row-wise):  [4 8 5]
Min (column-wise):  [9 5 6 6 4]
Max (row-wise):  [11 11 11]
Max (column-wise):  [11 11 11 11  9]
Index of Min (row-wise):  [4 2 1]
Index of Min (column-wise):  [2 2 2 0 0]
Index of Max (row-wise):  [2 0 3]
Index of Max (column-wise):  [1 1 0 2 1]
Mean (row-wise):  [7.4 9.6 7.8]
Mean (column-wise):  [10.          7.33333333  8.33333333  8.66666667  7.        ]
Median (row-wise):  [6. 9. 8.]
Median (column-wise):  [10.  6.  8.  9.  8.]
Std Dev (row-wise):  [2.65329983 1.2        2.13541565]
Std Dev (column-wise):  [0.81649658 2.62466929 2.05480467 2.05480467 2.1602469 ]
