# Numpy Introduction

In [1]:
import numpy as np

### Creating arrays

In [2]:
L = [1,2,3,4]
np.array(L)

array([1, 2, 3, 4])

In [3]:
np.arange(1,10)

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [4]:
# arange(start, stop) returns a numpy array
np.arange(4,10)

array([4, 5, 6, 7, 8, 9])

In [5]:
# arange(start, stop, step_size) returns a numpy array
np.arange(4,10,2)

array([4, 6, 8])

In [6]:
# multidimensional array
array_of_arrays = [[1,2,3], [4,5,6], [7,8,9]]
np.array(array_of_arrays)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [7]:
# reshape(n_rows, n_columns) changes the shape of a numpy array

np.arange(1,10).reshape(3,3)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

### Useful methods

In [8]:
arr = np.array([17, 9, 3, -5, -1])

In [9]:
arr.max()

17

In [10]:
arr.min()

-5

In [11]:
arr.mean()

4.6

In [12]:
arr.std()

7.735631842325487

In [13]:
sorted(arr)

[-5, -1, 3, 9, 17]

In [14]:
arr

array([17,  9,  3, -5, -1])

In [15]:
arr.sort()

In [16]:
arr

array([-5, -1,  3,  9, 17])

In [17]:
X = np.arange(1, 26).reshape(5,5)
X

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25]])

In [21]:
X.mean(axis=1)

13.0

In [19]:
X.mean(axis=0)

array([11., 12., 13., 14., 15.])

In [20]:
mean = X.mean(axis=0)
mean

array([11., 12., 13., 14., 15.])

In [22]:
X - mean

array([[-10., -10., -10., -10., -10.],
       [ -5.,  -5.,  -5.,  -5.,  -5.],
       [  0.,   0.,   0.,   0.,   0.],
       [  5.,   5.,   5.,   5.,   5.],
       [ 10.,  10.,  10.,  10.,  10.]])

### Random numbers

In [24]:
# random number between 0 and 1
np.random.rand(5,5)

array([[0.55271604, 0.45925238, 0.42476098, 0.89080298, 0.3117506 ],
       [0.14699619, 0.77412789, 0.42695314, 0.38062672, 0.73983069],
       [0.0201803 , 0.0272383 , 0.16629684, 0.67599114, 0.8368625 ],
       [0.44836919, 0.87782616, 0.26043322, 0.9128664 , 0.4856928 ],
       [0.12180708, 0.21589375, 0.4394631 , 0.73139155, 0.9195354 ]])

In [25]:
# randint(start, exclusive stop, (shape)) returns random integers between start and stop, with a given shape
np.random.randint(0, 100, (5,5))

array([[81, 89, 97, 63, 97],
       [91, 46, 18, 76, 39],
       [27, 67, 65, 86, 26],
       [38, 24,  2, 65, 80],
       [32, 63, 86, 54, 31]])

In [27]:
# random decimal number between start and exclusive stop
np.random.uniform(1,10)

2.6089538013193363

### Array indexing

In [28]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [29]:
arr[8]

8

In [30]:
arr[3:8]

array([3, 4, 5, 6, 7])

In [31]:
arr[:-1]

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [32]:
arr[-1]

9

In [33]:
arr[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

### Array broadcasting

In [None]:
arr[:5] = 25
arr

In [None]:
arr = np.arange(10)
arr

In [None]:
# using a "pointer" to arr
arr2 = arr[:5]
arr2[:] = 99
arr2

In [None]:
arr

In [None]:
# using a copy of arr
arr3 = arr.copy()
arr3[:] = 0
arr3

In [None]:
arr

### 2d Array (Matrix) indexing

In [34]:
arr_2d = np.arange(12).reshape(4,3)
arr_2d

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [35]:
arr_2d[2][2]

8

In [36]:
# arr[row_index, col_index]

arr_2d[2,2]

8

In [37]:
arr_2d[0]          # get a row (a sample)

array([0, 1, 2])

In [38]:
arr_2d[:4, 0]  # get a column (a single feature)

array([0, 3, 6, 9])

In [39]:
arr_2d

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [40]:
arr_2d[1:3, 1:]   # get a slice

array([[4, 5],
       [7, 8]])

### Appending arrays

In [41]:
a = np.arange(9).reshape(3,3)
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [42]:
b = np.random.randint(9,18, (3,3))
b

array([[16, 15, 13],
       [10, 15, 15],
       [17, 16, 12]])

In [43]:
c = np.vstack((a,b))
c

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [16, 15, 13],
       [10, 15, 15],
       [17, 16, 12]])

In [44]:
np.concatenate((a,b),axis=0)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [16, 15, 13],
       [10, 15, 15],
       [17, 16, 12]])

In [45]:
np.hstack((a,b))

array([[ 0,  1,  2, 16, 15, 13],
       [ 3,  4,  5, 10, 15, 15],
       [ 6,  7,  8, 17, 16, 12]])

In [46]:
np.concatenate((a,b),axis=1)

array([[ 0,  1,  2, 16, 15, 13],
       [ 3,  4,  5, 10, 15, 15],
       [ 6,  7,  8, 17, 16, 12]])

### Boolean (conditional) selection

In [47]:
arr = np.arange(1,11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [48]:
arr > 5      # boolean array

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [49]:
arr[arr>5]      # you will use this A LOT in pandas!

array([ 6,  7,  8,  9, 10])

### Element-wise operations

In [50]:
# python List
L = [1,2,3,4,5]

# numpy array
arr = np.array([1,2,3,4,5])

In [51]:
L*2

[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]

In [52]:
arr*2

array([ 2,  4,  6,  8, 10])

In [53]:
arr**2

array([ 1,  4,  9, 16, 25], dtype=int32)

In [54]:
arr + 100

array([101, 102, 103, 104, 105])

In [55]:
arr2 = np.arange(6,11)
arr2

array([ 6,  7,  8,  9, 10])

In [56]:
arr + arr2

array([ 7,  9, 11, 13, 15])

In [57]:
arr*arr2

array([ 6, 14, 24, 36, 50])

### Universal functions

In [None]:
arr_ordered = np.arange(10,-1, -1)
arr_ordered

In [None]:
np.percentile(arr_ordered, [25, 75])   # interquartile range

In [None]:
np.median(arr_ordered)

In [None]:
np.sqrt(arr_ordered)

In [None]:
np.min(arr_ordered)

In [None]:
# index position of the smallest value
np.argmin(arr_ordered)

In [None]:
np.max(arr_ordered)

In [None]:
# index position of the largest value
np.argmax(arr_ordered)

In [None]:
an_arr = np.array([12, 3, 2, 8])
an_arr

In [None]:
# index position of the smallest to the largest values
np.argsort(an_arr)

# Linear Algebra

In [58]:
# Matrix (2d)
A = np.array([[1, 2, 1, 1], [1, 3, 0, 3], [1, 0, 1, 0]])
A

array([[1, 2, 1, 1],
       [1, 3, 0, 3],
       [1, 0, 1, 0]])

In [59]:
# 1d array
b = np.array([225, 10, 3, -7])
b

array([225,  10,   3,  -7])

In [60]:
# dot product
A.dot(b)

array([241, 234, 228])