## Installing Libraries

In [1]:
!pip install numpy



## DataTypes & Attributes

In [2]:
import numpy as np

# numpy array
a1 = np.array([1, 2, 3])
a2 = np.array([[1, 2, 3.3], [4, 5, 6.5]])
a3 = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], 
              [[10, 11, 12], [13, 14, 15], [16, 17, 18]]])

# a1 => vector
# a2 and a3 => matrix

print("a1.dtype : ", a1.dtype)
print("a2.dtype : ", a2.dtype) 
print("a3.dtype : ", a3.dtype) 

print("--------------------------")

print("a1.type : ", type(a1)) # identifies the type of an object
print("a2.type : ", type(a2)) 
print("a3.type : ", type(a3))  

print("--------------------------")

print("a1.shape : ", a1.shape)
print("a2.shape : ", a2.shape) 
print("a3.shape : ", a3.shape) 

print("--------------------------")

print("a1.dimension : ", a1.ndim)
print("a2.dimension : ", a2.ndim) 
print("a3.dimension : ", a3.ndim)

print("--------------------------")

a3

a1.dtype :  int32
a2.dtype :  float64
a3.dtype :  int32
--------------------------
a1.type :  <class 'numpy.ndarray'>
a2.type :  <class 'numpy.ndarray'>
a3.type :  <class 'numpy.ndarray'>
--------------------------
a1.shape :  (3,)
a2.shape :  (2, 3)
a3.shape :  (2, 3, 3)
--------------------------
a1.dimension :  1
a2.dimension :  2
a3.dimension :  3
--------------------------


array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [3]:
# Create dataframe from numpy array

import pandas as pd

a2 = np.array([[1, 2, 3.3], [4, 5, 6.5]])
df = pd.DataFrame(a2)
df

Unnamed: 0,0,1,2
0,1.0,2.0,3.3
1,4.0,5.0,6.5


## Creating Arrays

In [4]:
# Let's remember a trick

sample_array_float = np.array([1., 1., 1.])
sample_array_int = np.array([1, 1, 1])

print("sample_array_float.dtype : ", sample_array_float.dtype)
print("sample_array_int.dtype : ", sample_array_int.dtype)

sample_array_float.dtype :  float64
sample_array_int.dtype :  int32


In [5]:
ones = np.ones((2, 3))

print("ones.dtype : ", ones.dtype)
print("ones.type : ", type(ones)) 
ones

ones.dtype :  float64
ones.type :  <class 'numpy.ndarray'>


array([[1., 1., 1.],
       [1., 1., 1.]])

In [6]:
zeros = np.zeros((2, 3))

print("zeros.dtype : ", zeros.dtype)
print("zeros.type : ", type(zeros)) 
zeros

zeros.dtype :  float64
zeros.type :  <class 'numpy.ndarray'>


array([[0., 0., 0.],
       [0., 0., 0.]])

In [7]:
# arange() function
# np.arange(start, stop, step)
range_array = np.arange(0, 10, 2)
range_array

array([0, 2, 4, 6, 8])

In [8]:
# random() function #1
# np.random.randint(low, high, size)
random_array = np.random.randint(0, 10, size=(3,5))

print("random_array.dtype : ", random_array.dtype)
print("random_array.shape : ", random_array.shape) 
random_array

random_array.dtype :  int32
random_array.shape :  (3, 5)


array([[6, 8, 0, 9, 0],
       [6, 4, 5, 2, 5],
       [0, 4, 1, 1, 4]])

In [9]:
# random() function #2
# np.random.rand(3, 5)
np.random.random((3, 5)) # [0.0, 1.0) 

array([[0.72209938, 0.59365526, 0.62551049, 0.14691954, 0.82324295],
       [0.10789865, 0.05891894, 0.60437841, 0.27247466, 0.04125331],
       [0.2028377 , 0.35177067, 0.28995858, 0.05756617, 0.04548066]])

In [10]:
# seed() function
# Seed function is used to save the state of a random function, 
# so that it can generate same random numbers on multiple executions of the code on the same machine or on different machines (for a specific seed value).
np.random.seed(0) 
seed_random = np.random.randint(0, 10, size=(2, 10))
seed_random

array([[5, 0, 3, 3, 7, 9, 3, 5, 2, 4],
       [7, 6, 8, 8, 1, 6, 7, 7, 8, 1]])

In [11]:
# unique() function
# Find the unique elements of an array
unique = np.unique(seed_random)
unique

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

## Viewing Arrays & Matrices

![alt text](matrix.png "Matrix")

##### Source: "https://www.onlinemathlearning.com/"

In [12]:
# Viewing array #1
a1

array([1, 2, 3])

In [13]:
a1

array([1, 2, 3])

In [14]:
a1.shape

(3,)

In [15]:
a1[:2]

array([1, 2])

In [16]:
# Viewing array #2
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [17]:
a3.shape

(2, 3, 3)

In [18]:
a3[0]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [19]:
a3[:2]

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [20]:
a3[:2, :2, :2]

array([[[ 1,  2],
        [ 4,  5]],

       [[10, 11],
        [13, 14]]])

In [21]:
# Viewing array #3
different_size_random_array = np.random.randint(0, 10, size=(2, 3, 4, 5))
different_size_random_array # 2 rows, 3 columns, inner matrices's shapes are (4, 5) 

array([[[[5, 9, 8, 9, 4],
         [3, 0, 3, 5, 0],
         [2, 3, 8, 1, 3],
         [3, 3, 7, 0, 1]],

        [[9, 9, 0, 4, 7],
         [3, 2, 7, 2, 0],
         [0, 4, 5, 5, 6],
         [8, 4, 1, 4, 9]],

        [[8, 1, 1, 7, 9],
         [9, 3, 6, 7, 2],
         [0, 3, 5, 9, 4],
         [4, 6, 4, 4, 3]]],


       [[[4, 4, 8, 4, 3],
         [7, 5, 5, 0, 1],
         [5, 9, 3, 0, 5],
         [0, 1, 2, 4, 2]],

        [[0, 3, 2, 0, 7],
         [5, 9, 0, 2, 7],
         [2, 9, 2, 3, 3],
         [2, 3, 4, 1, 2]],

        [[9, 1, 4, 6, 8],
         [2, 3, 0, 0, 6],
         [0, 6, 3, 3, 8],
         [8, 8, 2, 3, 2]]]])

In [22]:
different_size_random_array[:, :, :, :2]

array([[[[5, 9],
         [3, 0],
         [2, 3],
         [3, 3]],

        [[9, 9],
         [3, 2],
         [0, 4],
         [8, 4]],

        [[8, 1],
         [9, 3],
         [0, 3],
         [4, 6]]],


       [[[4, 4],
         [7, 5],
         [5, 9],
         [0, 1]],

        [[0, 3],
         [5, 9],
         [2, 9],
         [2, 3]],

        [[9, 1],
         [2, 3],
         [0, 6],
         [8, 8]]]])

## Manipulating & Comparing Arrays

### Arithmetic Operations

![alt text](arithmetic_operations.png "Arithmetic Operations")

##### Source: "https://www.devopsschool.com/blog/"

In [23]:
# let's remember a1 and it's shape
a1

array([1, 2, 3])

In [24]:
print("a1.shape : ", a1.shape)

a1.shape :  (3,)


In [25]:
ones = np.ones((3))
ones

array([1., 1., 1.])

In [26]:
a1 + ones

array([2., 3., 4.])

In [27]:
a1 - ones

array([0., 1., 2.])

In [28]:
a1 * ones

array([1., 2., 3.])

In [29]:
# let's remember a2 and it's shape
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [30]:
print("a2.shape : ", a2.shape)

a2.shape :  (2, 3)


In [31]:
result = a1 * a2
result

array([[ 1. ,  4. ,  9.9],
       [ 4. , 10. , 19.5]])

In [32]:
result.shape

(2, 3)

In [33]:
# let's remember a3 and it's shape
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [34]:
print("a3.shape : ", a3.shape)

a3.shape :  (2, 3, 3)


In [35]:
# a2 * a3
# Error => ValueError: operands could not be broadcast together with shapes (2, 3) (2, 3, 3) 

# Broadcast : The term broadcasting refers to how numpy treats arrays with different Dimension during arithmetic operations which lead to certain constraints, 
# the smaller array is broadcast across the larger array so that they have compatible shapes.

# The problem is "How can you reshape a2 to be compatible with a3?"

In [36]:
a1 / ones

array([1., 2., 3.])

In [37]:
a2 / ones

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [38]:
# Floor division removes the decimals (rounds down)
a2 // ones

array([[1., 2., 3.],
       [4., 5., 6.]])

In [39]:
# Square #1
a2 ** 2

array([[ 1.  ,  4.  , 10.89],
       [16.  , 25.  , 42.25]])

In [40]:
# Square #2
np.square(a2)

array([[ 1.  ,  4.  , 10.89],
       [16.  , 25.  , 42.25]])

In [41]:
# Add
np.add(a1, a2)

array([[2. , 4. , 6.3],
       [5. , 7. , 9.5]])

In [42]:
# Mod #1
a1 % 2

array([1, 0, 1], dtype=int32)

In [43]:
# Mod #2
np.mod(a1, 2)

array([1, 0, 1], dtype=int32)

In [44]:
# Logarithm
np.log(a1)

array([0.        , 0.69314718, 1.09861229])

In [45]:
# Exponential
np.exp(a2)

array([[  2.71828183,   7.3890561 ,  27.11263892],
       [ 54.59815003, 148.4131591 , 665.14163304]])

### Aggregation

The aggregation is performing the same operation on a number of things

In [46]:
a1

array([1, 2, 3])

In [47]:
# a1 is numpy array
type(a1)

numpy.ndarray

In [48]:
# Python's sum()
sum(a1)

6

In [49]:
# Numpy's sum()
np.sum(a1)

6

In [50]:
# What is the difference between sum and np.sum?
# Use Python's methods (sum()) on Python datatypes and use Numpy's methods (np.sum()) on Numpy arrays

In [51]:
# Creative a massive Numpy array
massive_array = np.random.random(100000)
massive_array.size

100000

In [52]:
massive_array[:10]

array([0.28847644, 0.43328806, 0.75610669, 0.39609828, 0.89603839,
       0.63892108, 0.89155444, 0.68005557, 0.44919774, 0.97857093])

In [53]:
# %timeit is Jupyter's magic function, it measures execution time of code

%timeit sum(massive_array) # Python's sum()
%timeit np.sum(massive_array) # Numpy's sum()

5.78 ms ± 109 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
36.8 µs ± 896 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [54]:
# let's compare
5.63 * 1000 / 35 # milliseconds to microseconds

# Python's sum() is 160.85714285714286 times slower in Numpy array

160.85714285714286

In [55]:
np.max(a2)

6.5

In [56]:
np.min(a2)

1.0

## Variance & Standard Deviation

![alt text](std_var_formula.png "Standard Deviation & Variance Formulas")

##### Source: "https://byjus.com/"

In [57]:
# Standard deviation => A measure of how spread out a group of numbers is from the mean
# Standard deviation #1
np.std(a2)

1.8226964152656422

In [58]:
# Variance => Measure of the average degree to which each number is different to the mean
# Higher variance = wider range of numbers
# lower variance = lower range of numbers

np.var(a2)

3.3222222222222224

In [59]:
# Standard deviation #2
np.sqrt(np.var(a2))

1.8226964152656422

#### Let's look at the std and var demo:

![alt text](std_var_demo1.gif "Demo1")

##### Source: "https://www.mathsisfun.com/"

In [60]:
size_of_animals = np.array([600, 470, 170, 430, 300])

In [61]:
# calculation of mean
mean_of_sizes = np.mean(size_of_animals)
mean_of_sizes

394.0

#### The green line shows the average:

![alt text](std_var_demo2.gif "Demo2")

##### Source: "https://www.mathsisfun.com/"

In [62]:
# calculation of variance
# We can use Numpy function (np.var()), but let's apply the formula for now

differences = size_of_animals - mean_of_sizes

#### Difference from the mean of sizes:

![alt text](std_var_demo3.gif "Demo3")

##### Source: "https://www.mathsisfun.com/"

In [63]:
var_of_sizes = np.sum(np.square(differences)) / len(size_of_animals) # ((600-394)^2 + (470-394)^2 + (170-394)^2 + (430-394)^2 + (300-394)^2)) / 5
var_of_sizes

21704.0

In [64]:
# calculation of standard deviation
std_of_sizes = np.sqrt(var_of_sizes)
std_of_sizes

147.32277488562318

#### Standard Deviation:

![alt text](std_var_demo4.gif "Demo4")

##### Source: "https://www.mathsisfun.com/"

## Reshaping & Transposing

In [65]:
# Let's remember a2 again
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [66]:
a2.shape

(2, 3)

In [67]:
a3.shape

(2, 3, 3)

In [68]:
# Remember => Multiplication of a3 and a2 gives an error due to broadcast problem
# a2 * a3

In [69]:
# Reshaping a2 according to a3 would be the solution
a2_reshape = a2.reshape(2, 3, 1)
a2_reshape

array([[[1. ],
        [2. ],
        [3.3]],

       [[4. ],
        [5. ],
        [6.5]]])

In [70]:
# This operation doesnt give error
a2_reshape * a3

array([[[  1. ,   2. ,   3. ],
        [  8. ,  10. ,  12. ],
        [ 23.1,  26.4,  29.7]],

       [[ 40. ,  44. ,  48. ],
        [ 65. ,  70. ,  75. ],
        [104. , 110.5, 117. ]]])

In [71]:
# Transpose => Switches the axis
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [72]:
a2.T

array([[1. , 4. ],
       [2. , 5. ],
       [3.3, 6.5]])

In [73]:
a2.T.shape

(3, 2)

In [74]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [75]:
a3.T

array([[[ 1, 10],
        [ 4, 13],
        [ 7, 16]],

       [[ 2, 11],
        [ 5, 14],
        [ 8, 17]],

       [[ 3, 12],
        [ 6, 15],
        [ 9, 18]]])

In [76]:
a3.T.shape

(3, 3, 2)

## Element-Wise Multiplication & Dot Product

In [77]:
np.random.seed(0)
mat1 = np.random.randint(0, 10, size=(5, 3))
mat2 = np.random.randint(0, 10, size=(5, 3))

In [78]:
mat1

array([[5, 0, 3],
       [3, 7, 9],
       [3, 5, 2],
       [4, 7, 6],
       [8, 8, 1]])

In [79]:
mat2

array([[6, 7, 7],
       [8, 1, 5],
       [9, 8, 9],
       [4, 3, 0],
       [3, 5, 0]])

#### Element-wise multiplication (Hadamard Product)

![alt text](element-wise.png "Product")
##### Source: "https://www.tutorialexample.com/"

NOTE => According to the element-wise multiplication rule, we can perform this operation if the matrices have (m, n) and (m, n) dimensions.

In [80]:
mat1 * mat2

array([[30,  0, 21],
       [24,  7, 45],
       [27, 40, 18],
       [16, 21,  0],
       [24, 40,  0]])

#### Dot Product

In [81]:
# np.dot(mat1, mat2)
# It gives error like "ValueError: shapes (5,3) and (5,3) not aligned: 3 (dim 1) != 5 (dim 0)"

![alt text](dot_product.jpg "Product") 
##### Source: "https://algebra1course.wordpress.com/"

NOTE => According to the dot product rule, we can perform this operation if the matrices have (m, n) and (n, t) dimensions. The product result is (m, t).

In [82]:
# Let's check shapes of them
mat1.shape, mat2.shape

((5, 3), (5, 3))

In [83]:
mat1.shape, mat2.T.shape # (m, n) and (n, m) => (m, m)

((5, 3), (3, 5))

In [84]:
dot_product = np.dot(mat1, mat2.T)

In [85]:
dot_product.shape

(5, 5)

## Comparison Operators

In [86]:
a1 

array([1, 2, 3])

In [87]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [88]:
a1 < a2

array([[False, False,  True],
       [ True,  True,  True]])

In [89]:
a1 == a2

array([[ True,  True, False],
       [False, False, False]])

In [90]:
bool_array = a1 >= a2
bool_array

array([[ True,  True, False],
       [False, False, False]])

In [91]:
type(bool_array), bool_array.dtype

(numpy.ndarray, dtype('bool'))

## Sorting Arrays

In [92]:
random_array = np.random.randint(15, size=(3, 5))
random_array

array([[ 2,  3,  8,  1,  3],
       [13,  3,  3, 14,  7],
       [ 0,  1,  9,  9,  0]])

In [93]:
np.sort(random_array)

array([[ 1,  2,  3,  3,  8],
       [ 3,  3,  7, 13, 14],
       [ 0,  0,  1,  9,  9]])

In [94]:
np.argsort(random_array) # Returns the indices of the values according to the result of this operation

array([[3, 0, 1, 4, 2],
       [1, 2, 4, 0, 3],
       [0, 4, 1, 2, 3]], dtype=int64)

In [95]:
# Another Example For argsort(), argmax() and argmin()
a1 = np.array([3, 1, 2])

In [96]:
np.sort(a1)

array([1, 2, 3])

In [97]:
np.argsort(a1)

array([1, 2, 0], dtype=int64)

In [98]:
np.argmax(a1) # Returns the max indice

0

In [99]:
np.argmin(a1) # Returns the max indice

1

In [100]:
# Let's give another example
random_array

array([[ 2,  3,  8,  1,  3],
       [13,  3,  3, 14,  7],
       [ 0,  1,  9,  9,  0]])

In [101]:
np.argmax(random_array, axis=0) 

array([1, 0, 2, 1, 1], dtype=int64)

In [102]:
np.argmax(random_array, axis=1) 

array([2, 3, 2], dtype=int64)