# Numpy Fundamentals
Numpy is a linear algebra library in Python. It is so important for data science as many libraries (eg. pandas) are built on top of it and rely Numpy as one of their main building blocks. Numpy is incredibly fast and good for mathematical operations involving vectors, matrices etc. In this notebook some codes for basic numpy operations are provided.

# Import numpy library

In [1]:
import numpy as np

# Creating numpy arrays

In [2]:
my_list = [1,2,3]
my_list

[1, 2, 3]

In [3]:
np.array(my_list)

array([1, 2, 3])

In [4]:
# Note that there is only 1 square bracket at each end, indicating the array is one-dimensional

In [5]:
my_matrix = [[1,2,3],[4,5,6],[7,8,9]]
my_matrix

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [6]:
np.array(my_matrix)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [7]:
# Note that there are 2 square brackets at each end, indicating the array is two-dimensional

# Some built-in Methods

In [8]:
# arange: return evenly spaced values within a given interval

In [9]:
np.arange(0,10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [10]:
np.arange(0,11,2)

array([ 0,  2,  4,  6,  8, 10])

In [11]:
# zeros and ones: generate arrays of zeros or ones

In [12]:
np.zeros(3)

array([0., 0., 0.])

In [13]:
np.zeros((5,5))

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [14]:
np.ones(3)

array([1., 1., 1.])

In [15]:
np.ones((3,3))

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [16]:
# Create an array of 10 fives
np.ones(10) * 5

array([5., 5., 5., 5., 5., 5., 5., 5., 5., 5.])

In [17]:
# linspace: return evenly spaced numbers over a specified interval.

In [18]:
np.linspace(0,10,3)

array([ 0.,  5., 10.])

In [19]:
np.linspace(0,10,50)

array([ 0.        ,  0.20408163,  0.40816327,  0.6122449 ,  0.81632653,
        1.02040816,  1.2244898 ,  1.42857143,  1.63265306,  1.83673469,
        2.04081633,  2.24489796,  2.44897959,  2.65306122,  2.85714286,
        3.06122449,  3.26530612,  3.46938776,  3.67346939,  3.87755102,
        4.08163265,  4.28571429,  4.48979592,  4.69387755,  4.89795918,
        5.10204082,  5.30612245,  5.51020408,  5.71428571,  5.91836735,
        6.12244898,  6.32653061,  6.53061224,  6.73469388,  6.93877551,
        7.14285714,  7.34693878,  7.55102041,  7.75510204,  7.95918367,
        8.16326531,  8.36734694,  8.57142857,  8.7755102 ,  8.97959184,
        9.18367347,  9.3877551 ,  9.59183673,  9.79591837, 10.        ])

In [20]:
# eye: creates an identity matrix

In [21]:
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

# For random number generation

In [22]:
# rand: create an array of the given shape and populate it with random samples from a uniform distribution over [0, 1)

In [23]:
np.random.rand(2)

array([0.36514791, 0.85746433])

In [24]:
np.random.rand(5,5)

array([[0.77289536, 0.06579335, 0.79272497, 0.44267994, 0.59224605],
       [0.30712307, 0.26765236, 0.15175342, 0.63713774, 0.52157758],
       [0.23667757, 0.03275247, 0.75298289, 0.249874  , 0.77688436],
       [0.56356974, 0.92451372, 0.3400061 , 0.84316396, 0.33868625],
       [0.8303543 , 0.78837432, 0.29464704, 0.7877713 , 0.26499746]])

In [25]:
# randn: return a sample (or samples) from the "standard normal" distribution. Unlike rand which is uniform:

In [26]:
np.random.randn(2)

array([-1.14644573, -0.04089596])

In [27]:
np.random.randn(5,5)

array([[ 1.73726801, -0.47695611,  1.07931597,  1.68803812, -0.34585656],
       [ 1.33525045,  0.45979417, -1.496107  , -0.45546143,  0.21071864],
       [-1.85574568,  0.57868999, -1.44100515,  0.57207211, -1.15187791],
       [ 0.11524428,  0.05048802, -1.06731524, -1.01513708,  0.37512817],
       [ 1.08845654,  0.86102561, -0.31367841,  0.06284673,  0.12470768]])

In [28]:
# randint: return random integers from low (inclusive) to high (exclusive).

In [29]:
np.random.randint(1,100)

54

In [30]:
np.random.randint(1,100,10)

array([93, 71, 40, 43, 77, 26, 27, 68, 73, 77])

# Array Attributes and Methods

In [31]:
# Shape: Shape is an attribute that arrays have (not a method)

In [32]:
arr = np.matrix([[1,2,3],[4,5,6],[7,8,9]])
arr

matrix([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [33]:
type(arr)

numpy.matrix

In [34]:
arr.dtype

dtype('int32')

In [35]:
arr.shape

(3, 3)

In [36]:
# Number of elements in the matrix
arr.size

9

In [37]:
# Number of array dimensions
np.ndim(arr)

2

In [38]:
# Sum of the whole matrix
np.sum(arr)

45

In [39]:
# Sum of each column
np.sum(arr,axis=0)

matrix([[12, 15, 18]])

In [40]:
# Sum of each row
np.sum(arr,axis=1)

matrix([[ 6],
        [15],
        [24]])

In [41]:
# Reshape: Returns an array containing the same data with a new shape

In [42]:
arr

matrix([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [43]:
arr.reshape(1,-1)

matrix([[1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [44]:
arr.reshape(-1,1)

matrix([[1],
        [2],
        [3],
        [4],
        [5],
        [6],
        [7],
        [8],
        [9]])

In [45]:
# arr reshaped by using reshape()
# reshape() is useful when inputs are required to be reshaped before fitting into machine learnig algorithm

In [46]:
# max,min,argmax,argmin: These are useful methods for finding max or min values. Or to find their index locations
# using argmin or argmax

In [47]:
arr

matrix([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [48]:
type(arr)

numpy.matrix

In [49]:
arr.max()

9

In [50]:
arr.min()

1

In [51]:
arr.argmax()

8

In [52]:
arr.argmin()

0

# Indexing and Selection

In [53]:
# Reseting arr to a one-dimensional array
arr = np.arange(0,20,2)
arr

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [54]:
# Bracket indexing and selection

In [55]:
# Select a value from arr based on index
arr[1]

2

In [56]:
# Select a range of values from arr based on index slicing
arr[2:6]

array([ 4,  6,  8, 10])

In [57]:
# Select the last 3 elements using negative indexes
arr[-3:]

array([14, 16, 18])

# Broadcasting

In [58]:
# Numpy arrays differ from a normal Python list because of their ability to broadcast

In [59]:
arr = np.arange(0,11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [60]:
# Get a slice of arr
slice_of_arr = arr[0:6]
slice_of_arr

array([0, 1, 2, 3, 4, 5])

In [61]:
# Modify the content inside slice_of_arr
slice_of_arr[:] = 99
slice_of_arr

array([99, 99, 99, 99, 99, 99])

In [62]:
# It is clear from above that it seems like only content of slice_of_arr just got changed, but not the original arr.
# Let's check if this is the case.
arr

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [63]:
# From the above it is obvious that the original arr has been changed due to broadcasting!
# Data is not copied, it's a view of the original array! This avoids memory problems!

In [64]:
# To avoid this and get a copy for the original arr, we need to specify explicitly
arr_copy = arr.copy()

# Indexing for 2D array (matrices)

In [65]:
# The general format is arr_2d[row][col] or arr_2d[row,col], but the latter (with comma) is recommended

In [66]:
arr_2d = np.array(([5,10,15],[20,25,30],[35,40,45]))
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [67]:
# Indexing for getting the first row
arr_2d[0,:]

array([ 5, 10, 15])

In [68]:
# Indexing for getting the second column
arr_2d[:,1]

array([10, 25, 40])

In [69]:
# Indexing for getting 25
arr_2d[1,1]

25

In [70]:
# Indexing for getting the bottom right 2x2 subset
arr_2d[1:,1:]

array([[25, 30],
       [40, 45]])

# Selection using comparison operators

In [71]:
arr = np.arange(1,11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [72]:
# Select only those > 5
arr[arr > 5]

array([ 6,  7,  8,  9, 10])

In [73]:
# Select only even numbers
arr[arr % 2 == 0]

array([ 2,  4,  6,  8, 10])

# Arithmetic operations

In [74]:
arr_1 = np.arange(1,21)
arr_1

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20])

In [75]:
arr_2 = np.arange(21,41)
arr_2

array([21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
       38, 39, 40])

In [76]:
arr_1 + arr_2

array([22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54,
       56, 58, 60])

In [77]:
arr_1 - arr_2

array([-20, -20, -20, -20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
       -20, -20, -20, -20, -20, -20, -20])

In [78]:
arr_1 * arr_2

array([ 21,  44,  69,  96, 125, 156, 189, 224, 261, 300, 341, 384, 429,
       476, 525, 576, 629, 684, 741, 800])

In [79]:
arr_1 / arr_2

array([0.04761905, 0.09090909, 0.13043478, 0.16666667, 0.2       ,
       0.23076923, 0.25925926, 0.28571429, 0.31034483, 0.33333333,
       0.35483871, 0.375     , 0.39393939, 0.41176471, 0.42857143,
       0.44444444, 0.45945946, 0.47368421, 0.48717949, 0.5       ])

# Universal Array Functions

In [80]:
arr_1

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20])

In [81]:
np.sqrt(arr_1)

array([1.        , 1.41421356, 1.73205081, 2.        , 2.23606798,
       2.44948974, 2.64575131, 2.82842712, 3.        , 3.16227766,
       3.31662479, 3.46410162, 3.60555128, 3.74165739, 3.87298335,
       4.        , 4.12310563, 4.24264069, 4.35889894, 4.47213595])

In [82]:
np.exp(arr_1)

array([2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01,
       1.48413159e+02, 4.03428793e+02, 1.09663316e+03, 2.98095799e+03,
       8.10308393e+03, 2.20264658e+04, 5.98741417e+04, 1.62754791e+05,
       4.42413392e+05, 1.20260428e+06, 3.26901737e+06, 8.88611052e+06,
       2.41549528e+07, 6.56599691e+07, 1.78482301e+08, 4.85165195e+08])

In [83]:
np.max(arr_1)

20

In [84]:
np.min(arr_1)

1

In [85]:
np.log(arr_1)

array([0.        , 0.69314718, 1.09861229, 1.38629436, 1.60943791,
       1.79175947, 1.94591015, 2.07944154, 2.19722458, 2.30258509,
       2.39789527, 2.48490665, 2.56494936, 2.63905733, 2.7080502 ,
       2.77258872, 2.83321334, 2.89037176, 2.94443898, 2.99573227])