# Numpy: part 1

This is the note for Numpy.



In [1]:
import numpy as np
L = list(range(10))
L

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [2]:
# this is a list
type(L)

list

In [3]:
# first element is integer
type(L[0])

int

In [4]:
# heterogenoeous list
L2 = [True, '2', 3.0, 4]
[type(item) for item in L2] # list comprehension

[bool, str, float, int]

In [11]:
# if not using list comprehesion, it is a bit cumbersome
types = []
for i in L2:
    types.append(type(i))
print(types)

[<class 'bool'>, <class 'str'>, <class 'float'>, <class 'int'>]


## Arrays
If I do not need mixed type data in the same list, it is easier to use `array` module.

In [14]:
# integer array. Array has to contain data of the same type
import numpy as np
np.array([1, 4, 2, 5, 3])

array([1, 4, 2, 5, 3])

In [15]:
# force data type: dtype
np.array([1,2,3,4,5], dtype = 'float32')

array([1., 2., 3., 4., 5.], dtype=float32)

In [16]:
# nested arrays
np.array([range(i, i+3) for i in [2,4,6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

In [17]:
# length 10 integer array with zeros
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [18]:
# array with linear sequence
np.arange(0, 20, 2) # start, end, step

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [22]:
# range(3) # this does not print out the list
[i for i in range(3)] # use list comprehension

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [21]:
[i for i in range(0, 20, 2)] # this is not an array, it is a list

[0, 1, 2]

In [None]:
list(range(0, 20, 2)) # a second way to print out the list

In [23]:
np.linspace(0, 1, 5) # from, end, number of elements

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

## Generate random numbers



In [36]:
# uniformly distributed rv between 0, 1
np.random.random((3, 3))

array([[0.92337536, 0.78182167, 0.35270943],
       [0.07314655, 0.52112199, 0.72371597],
       [0.40264526, 0.1598753 , 0.07403676]])

In [35]:
# normally distributed rv with mean 0 sd 1
np.random.normal(0, 1, (3, 3))

array([[-0.58112598, -0.66739173, -0.03223013],
       [-2.92262658, -0.61697541,  5.412998  ],
       [ 2.10372867, -0.73515018, -0.87912893]])

In [26]:
# random integers in the interval between 0 and 10
np.random.randint(0, 10, (3 ,3))

array([[2, 9, 2],
       [1, 0, 0],
       [2, 2, 1]])

# Manipulation with numpy

- size, shape, data types
- indexing
- slicing
- reshaping
- joining and spliting

In [39]:
# create a few arrays
np.random.seed(0)

x1 = np.random.randint(10, size = 6)
x2 = np.random.randint(10, size = (3,4))
x3 = np.random.randint(10, size = (3, 4, 5))

In [42]:
# dimension, shape, size
print("x3 ndim ", x3.ndim) # dimension here is not nrow ncol
print("x3 shape ", x3.shape)
print("x3 size ", x3.size) # 3 times 4 times 5
print("x3 data type ", x3.dtype)

x3 ndim  3
x3 shape  (3, 4, 5)
x3 size  60
x3 data type  int64


## Array indexing
Counting starts from zero, and access the index with square brackets

In [43]:
x1

array([5, 0, 3, 3, 7, 9])

In [44]:
x1[0] # for multiple, use [index1, index2]

np.int64(5)

In [45]:
# negative indexing
x1[-1]

np.int64(9)

In [47]:
# multi-dimensional array
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [48]:
x2[0, 0] # row id, col id

np.int64(3)

In [50]:
# entire row (array slicing)
x2[0, :]

array([3, 5, 2, 4])

In [51]:
# update value in place
x2[0, 0] = 100
x2

array([[100,   5,   2,   4],
       [  7,   6,   8,   8],
       [  1,   6,   7,   7]])

### Array slicing

In [52]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [53]:
x[:5] # up to 5 (not including)

array([0, 1, 2, 3, 4])

In [54]:
# x[5:] # from 5
x[4:7]

array([4, 5, 6])

In [55]:
# every other element (using steps, default is 1)
x[::2]

array([0, 2, 4, 6, 8])

In [56]:
x[1::2] # start from index 1

array([1, 3, 5, 7, 9])

In [58]:
# reverse the array
x[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [59]:
x2

array([[100,   5,   2,   4],
       [  7,   6,   8,   8],
       [  1,   6,   7,   7]])

In [60]:
x2[:2, :3] # equiv. 0:2, 0:3

array([[100,   5,   2],
       [  7,   6,   8]])

In [62]:
x2[0]# first row, equiv. x2[0, :]

array([100,   5,   2,   4])

# Subarrays, copy and no-copy

Observe whether the original array changes!!

In [64]:
print(x2)

[[100   5   2   4]
 [  7   6   8   8]
 [  1   6   7   7]]


In [66]:
# extract 2 by 2 subarray
x2_sub = x2[0:2, 0:2]
print(x2_sub)

[[100   5]
 [  7   6]]


In [69]:
# modify first row into zeros
x2_sub[0, :] = [0, 0]
print(x2_sub) # modified

[[0 0]
 [7 6]]


In [70]:
# check original data, first 2 also modified!
print(x2)

[[0 0 2 4]
 [7 6 8 8]
 [1 6 7 7]]


In [71]:
# create a copy instead
x2_sub_copy = x2[0:2, 0:2].copy()
# modify to a different value
x2_sub_copy[0, :] = [100, 100]
print(x2_sub_copy) # this version has changed
print(x2) # check original again

[[100 100]
 [  7   6]]
[[0 0 2 4]
 [7 6 8 8]
 [1 6 7 7]]


# reshaping

Put an array into a matrix or higher dimensional array

In [74]:
grid = np.arange(1, 10).reshape(3, 3)
grid

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [81]:
x = np.array([1, 2, 3])
# row vector via reshape
x.reshape((1, 3)) # one row, 3 column

[1 2 3]


In [87]:
# col reshape
xr = x.reshape((3, 1))
print(xr)

[[1]
 [2]
 [3]]


### Concatenate and splitting arrays

In [88]:
# array concantenation and splitting
x = np.array([1,2,3])
y = np.array([4,5,6])
np.concatenate([x, y]) # the square brackets remains

array([1, 2, 3, 4, 5, 6])

In [91]:
# two dimensional arrays (pay attention to the number of brackets)
grid = np.array([[1,2,3],
                [4,5,6]])

In [93]:
print(np.concatenate([grid, grid]))

[[1 2 3]
 [4 5 6]
 [1 2 3]
 [4 5 6]]


In [94]:
# concat along different axis (by row or column)
print(np.concatenate([grid, grid], axis = 0))  # by column

[[1 2 3]
 [4 5 6]
 [1 2 3]
 [4 5 6]]


In [95]:
print(np.concatenate([grid, grid], axis = 1))  # by row

[[1 2 3 1 2 3]
 [4 5 6 4 5 6]]


In [97]:
# alternatively, use np.vstack or np.hstack
x2 = x
grid2 = np.array([11, 12, 13])
print(np.vstack([x2, grid2]) )

[[ 1  2  3]
 [11 12 13]]


In [102]:
# splitting
x = np.arange(10)
x1, x2, x3 = np.split(x, [2, 4])
print(x1, x2, x3)

[0 1] [2 3] [4 5 6 7 8 9]


# Indexing


In [118]:
rand = np.random.RandomState(42)
x = rand.randint(100, size = 10)
print(x)

[51 92 14 71 60 20 82 86 74 74]


In [119]:
# access three elements
[x[3], x[7], x[2]]

[np.int64(71), np.int64(86), np.int64(14)]

In [121]:
# use index
ind = [3, 7, 2]
x[ind]

array([71, 86, 14])

In [124]:
# higher dimension indexing
ind = np.array([[3, 7],[4, 5]])
print(x[ind])

[[71 86]
 [60 20]]


# Sorting


In [130]:
# np.sort, np.argsort
x = np.array([2, 1, 4,3,5])
np.sort(x)

array([1, 2, 3, 4, 5])

In [128]:
# sort in-place, the sorted values will be saved
x.sort()
print(x)

[1 2 3 4 5]


In [131]:
# if want to keep the index instead of values
# note that the values returned are the indices in ascending order
x = np.array([2, 1, 4, 3, 5])
i = np.argsort(x)
print(i)
# combine with indexing
print(x[i])

[1 0 3 2 4]


In [133]:
# sorting along rows and columns
X = rand.randint(low = 0, high = 20, size = 15).reshape(3, 5)

In [135]:
print(X)

[[ 3  7  2  1 11]
 [ 5  1  0 11 11]
 [16  9 15 14 14]]


In [137]:
# sort each column
print(np.sort(X, axis = 0))

[[ 3  1  0  1 11]
 [ 5  7  2 11 11]
 [16  9 15 14 14]]


In [138]:
print(np.sort(X, axis = 1)) # by row

[[ 1  2  3  7 11]
 [ 0  1  5 11 11]
 [ 9 14 14 15 16]]
