# 🔢 Numpy

In [None]:
import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(42) # Fixes the values for randomly generated data.

## 📖 Theory
Ndarray = multidimensional arrays which provide *fast array-oriented arithmetics* and *flexible broadcasting capabilities*.

Allows fast vectorized array operations.

Extremely efficient on large arrays of data because it internally stores data in contigious block of memory. They also use much less memory.

## ⌚Performance difference between numpy array and Python lists

In [6]:
def multiplyByTwoNumpy():
    my_arr = np.arange(10000000)
    time_before_operation = time.time()
    my_arr_2 = my_arr * 2 # vectorized operation which happen on the entire array. (No need of loops)
    print(f"Numpy array took {(time.time() - time_before_operation)*1000} ms to complete the task")

def multiplyByTwoList():
    my_arr = list(range(10000000))
    time_before_operation = time.time()
    my_arr_2 = [x * 2 for x in my_arr]
    print(f"Python list took {(time.time() - time_before_operation)*1000} ms to complete the task")

multiplyByTwoNumpy()
multiplyByTwoList()


Numpy array took 38.47837448120117 ms to complete the task
Python list took 866.0182952880859 ms to complete the task


## ❓"Random" feature of numpy

 The NumPy Random Module provides functions to *generate random numbers*: integers , floats , samples from distrbutions , and more. Useful for *simulations, testing and ML initialization*.

In [47]:
print(np.random.rand()) # random float between [0,1).
print("-----------------------------------------------")
print(np.random.rand(2,3)) # creates a 2 x 3 matrix.

0.4065199163762486
-----------------------------------------------
[[0.46894025 0.26923558 0.29179277]
 [0.4576864  0.86053391 0.5862529 ]]


In [None]:
print(np.random.randint(1, 10, 5)) # creates a 1D array with elements 1->9 of len 5.
print("------------------------------------------------")
print(np.random.randint(1, 10, (2,3))) # creates a 2x3 matrix.


[2 3 1 5 1]
------------------------------------------------
[[8 1 1]
 [2 2 6]]


array([[1, 3, 1, 5, 7],
       [1, 2, 3, 4, 5]])

In [198]:
print(np.random.randn(3)) # 1D array of normally distributed elements (mean = 0, std = 1)
print('--------------------------------------\n')
print(np.random.randn(7, 4)) # matrix of (7,4) 
print('--------------------------------------\n')
print(np.random.normal(10, 2, 5)) # 1D , 5 elements , mean = 10, std = 2
print('--------------------------------------\n')
print(np.random.normal(10, 2, (2,3))) # 1D, (2,3), mean = 10, std = 2



[-0.18330409  0.3050598   1.51568847]
--------------------------------------

[[-0.90987231  3.18937146 -0.35647359 -1.43992989]
 [ 0.39014283 -0.53965906 -1.99139901  1.14576499]
 [ 0.24342141  1.5467417  -0.21941362 -0.99450663]
 [-1.158662   -1.27055774  0.57156368  0.38106529]
 [-0.93666508 -0.5952576   1.35816371  0.38792939]
 [ 1.09095673  0.38378993  0.75614904  0.17088587]
 [-2.30728252  1.04651988 -0.26743478 -0.19158513]]
--------------------------------------

[ 9.0765099   9.85942242  8.34238848 13.41495746 10.17806823]
--------------------------------------

[[ 8.64011131  8.53114359  9.1803686 ]
 [11.04896958 13.67711361  9.01179736]]


In [129]:
# np.random.shuffle(arr/matrix) => Randomly shuffles the whole array/matrix.
arr = np.array([1, 2, 3, 4, 5])
np.random.shuffle(arr)
print(arr)
print('---------------------------\n')

mat = np.array([[1, 2, 3, 4, 5],[6,7,8,9,10]])
np.random.shuffle(mat)
print(mat)

[5 4 2 3 1]
---------------------------

[[ 6  7  8  9 10]
 [ 1  2  3  4  5]]


## 📦 The NumPy ndarray: A multidimensional Array Object for homogenous data.

#### 🧮 Batch Computations

In [84]:
data = np.random.randn(2,3)

print(data * 10)    
print('\n--------------------------------------\n') 
print(data + data)                       

[[ -9.08024076 -14.12303701  14.65648769]
 [ -2.257763     0.67528205 -14.24748186]]

--------------------------------------

[[-1.81604815 -2.8246074   2.93129754]
 [-0.4515526   0.13505641 -2.84949637]]


#### 📐 Every N-D-Array has a shape (a tuple) and a fixed datatype

In [86]:
print(data.shape)
print(data.dtype)

(2, 3)
float64


#### 🛠️ Creating ndArrays

In [None]:
# 1D array (arr1.ndim = 1) (ndim = number of dimensions)
data1 = [6, 7.5, 8, 0 , 1] # heterogeneous data type.
arr1 = np.array(data1)
arr1 # gets homogenously converted to priority-wise superior of the two.

array([6. , 7.5, 8. , 0. , 1. ])

In [None]:
# 2D array (arr2.ndim = 2)
data2 = [[1, 2, 3], [4, 5, 6]]
arr2 = np.array(data2) # Matrix of shape (2,3)
arr2

array([[1, 2, 3],
       [4, 5, 6]])

In [101]:
# Create zero arrays and matrices
zeroArr = np.zeros(5) # 1D Array with dtype = float 64.
print(zeroArr)
print('\n--------------------------------------\n')
zeroMat = np.zeros((3,3)) # 2D Array (3,3)
print(zeroMat)

[0. 0. 0. 0. 0.]

--------------------------------------

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [108]:
# Sorted sequence/matrix of whole numbers
arranged = np.arange(1,10) # (start, end{exclusive}, steps) 
print(arranged)
print('-------------------------------------------\n')
print(arranged.reshape(3,3))


[1 2 3 4 5 6 7 8 9]
-------------------------------------------

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [110]:
# Create ones arrays and matrices
oneArr = np.ones(5) # 1D Array with dtype = float 64.
print(oneArr)
print('--------------------------------------\n')
oneMat = np.ones((3,3)) # 2D Array (3,3)
print(oneMat)

[1. 1. 1. 1. 1.]
--------------------------------------

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


#### 📊 Data Types of N-D-Arrays 

In [116]:
arr1 = np.array([1,2,3], np.float32)
print(arr1, arr1.dtype)

arr2 = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr2_int = arr2.astype(np.int32) # Explicit type conversion
print(arr2_int, arr2_int.dtype)


[1. 2. 3.] float32
[ 3 -1 -2  0 12 10] int32


#### 📏 Arithmetic with NumPy Arrays

In [130]:
arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [131]:
arr * arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [132]:
arr - arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [133]:
1 / arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [134]:
arr ** 0.5

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [135]:
# Comparision between 2 arrays of sae size yields boolean arrays
arr2 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr2 > arr 

array([[False,  True, False],
       [ True, False,  True]])

#### 🪓 Basic Indexing and Slicing

*array[start : stop : step]* => 1D {stop is enclusive}

*array[row_start : row_stop : row_step, col_start : col_stop : col_step]* => 2D (there can also be a boolean expression either in the row or the column parameters)

{row_stop and col_stop is exclusive}

##### Slicing of 1D array

In [None]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [174]:
arr[5]

5

In [175]:
arr[5:8] # gives 8 - 5 = 3 elements {5, 6, 7}

array([5, 6, 7])

In [None]:
arr[5:8] = 12
arr
# if you assign a scalar value to a slice, as in arr[5:8] = 12, the value is propagated (or broadcasted henceforth) to the entire selection.

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

 An important distinction between python lists and numpy arrays are that *sliced sub-arrays of a numpy array is a view of the original array, i.e, any change in the sub-array alters the original array*.

In [192]:
# Use the copy method to create a copy
a = np.array([1, 2, 3])
b = a          # just a reference (changes in b affects a)
c = a.copy()   # independent copy (changes in c does not affect a)

b[0] = 10
c[1] = 20

In [180]:
sample = np.arange(10) 
sample[2:5] = 1
sub_sample = sample[2:5] # [1,1,1]
sub_sample[0] = 99
sample

array([ 0,  1, 99,  1,  1,  5,  6,  7,  8,  9])

In [183]:
# The bare slice will assign to all the array
sample2 = np.arange(5)
sample2[:] = 20
sample2

array([20, 20, 20, 20, 20])

##### Slicing of 2D arrays

In [184]:
arr2d = np.array([[1,2,3], [4,5,6], [7,8,9]])
arr2d[1]

array([4, 5, 6])

In [None]:
arr2d[0][1] # arr2d[0,1]

2

In [None]:
arr2d[: 2] # arr2d[0:2:1, 0:3:1] 

array([[1, 2, 3],
       [4, 5, 6]])

In [194]:
arr2d[:2, 1:] # arr2d[0:2:1, 1:3:1]

array([[2, 3],
       [5, 6]])

In [None]:
arr2d[1, :2] # arr2d[1, 0:2:1] {Just row index = 1, col 0 and 1}

array([4, 5])

#### 💭 Boolean Indexing


Selecting data from an array by boolean indexing *always creates a copy of the data*,
even if the returned array is unchanged.

In [226]:
integer = np.arange(10).reshape(2,5)
condition = integer == 1
print(integer)
print('-------------------\n')
print(condition)
print('-------------------\n')
print(integer[condition])

[[0 1 2 3 4]
 [5 6 7 8 9]]
-------------------

[[False  True False False False]
 [False False False False False]]
-------------------

[1]


In [None]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe' ])
names
# Suppose each word corresponds to a row in the data matrix

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [201]:
data = np.random.randn(7,4)
data

array([[-0.06706405, -2.05666503,  0.23917196, -0.21399184],
       [-1.05107072, -0.82316898,  0.31313814,  0.7097705 ],
       [-0.98122351,  0.03116026, -0.40082686, -0.51478689],
       [ 0.33877324, -0.24637294, -0.1205141 ,  0.64073562],
       [-0.64596713, -0.85457936,  1.50407098,  1.06802066],
       [-0.0779158 , -0.83451059,  1.20115444, -0.24373551],
       [-0.4249995 ,  1.4041934 , -0.42193846,  0.23250828]])

In [236]:
print(names == 'Bob')
data[names == 'Bob'] # prints the 0th and 3rd rows because it is True

# In analogy of ML, consider the names as a column vector. It returns a True/False vector of the same dimensions. If we do df[this true, false vector] => We will get all those rows where the cell has True

[ True False False  True False False False]


array([[-0.06706405, -2.05666503,  0.23917196, -0.21399184],
       [ 0.33877324, -0.24637294, -0.1205141 ,  0.64073562]])

In [209]:
data[names == 'Bob', 2:] # get those rows where names == Bob, and columns from 2 to last

array([[ 0.23917196, -0.21399184],
       [-0.1205141 ,  0.64073562]])

In [211]:
data[names == 'Bob', 3] # get those rows where names == Bob, and index 3 column

array([-0.21399184,  0.64073562])

In [215]:
# Negating a condition
condition = ~(names == 'Bob')
condition

array([False,  True,  True, False,  True,  True,  True])

In [None]:
data[condition]

array([[-1.05107072, -0.82316898,  0.31313814,  0.7097705 ],
       [-0.98122351,  0.03116026, -0.40082686, -0.51478689],
       [-0.64596713, -0.85457936,  1.50407098,  1.06802066],
       [-0.0779158 , -0.83451059,  1.20115444, -0.24373551],
       [-0.4249995 ,  1.4041934 , -0.42193846,  0.23250828]])

In [229]:
mask = (names == 'Bob') | (names == 'Will')
print(mask)

[ True False  True  True  True False False]


In [230]:
data[mask]

array([[-0.06706405, -2.05666503,  0.23917196, -0.21399184],
       [-0.98122351,  0.03116026, -0.40082686, -0.51478689],
       [ 0.33877324, -0.24637294, -0.1205141 ,  0.64073562],
       [-0.64596713, -0.85457936,  1.50407098,  1.06802066]])

In [222]:
np.arange(10).reshape(2,5) == 1

array([[False,  True, False, False, False],
       [False, False, False, False, False]])

In [None]:
data[:, 3]

array([-0.21399184,  0.7097705 , -0.51478689,  0.64073562,  1.06802066,
       -0.24373551,  0.23250828])

In [245]:
print(data)
print('------------------------------------------------------\n')
# Set all negatives = 0
condition = data < 0
print(condition)
print('------------------------------------------------------\n')
data[condition] = 0
data

[[0.         0.         0.23917196 0.        ]
 [0.         0.         0.31313814 0.7097705 ]
 [0.         0.03116026 0.         0.        ]
 [0.33877324 0.         0.         0.64073562]
 [0.         0.         1.50407098 1.06802066]
 [0.         0.         1.20115444 0.        ]
 [0.         1.4041934  0.         0.23250828]]
------------------------------------------------------

[[False False False False]
 [False False False False]
 [False False False False]
 [False False False False]
 [False False False False]
 [False False False False]
 [False False False False]]
------------------------------------------------------



array([[0.        , 0.        , 0.23917196, 0.        ],
       [0.        , 0.        , 0.31313814, 0.7097705 ],
       [0.        , 0.03116026, 0.        , 0.        ],
       [0.33877324, 0.        , 0.        , 0.64073562],
       [0.        , 0.        , 1.50407098, 1.06802066],
       [0.        , 0.        , 1.20115444, 0.        ],
       [0.        , 1.4041934 , 0.        , 0.23250828]])