In [1]:
import numpy as np

# Datatypes and attributes

In [2]:
# numpy's main data type is ndarray(n-dimensional array)
a1 = np.array([1, 2, 3])
a1

array([1, 2, 3])

In [3]:
a2 = np.array([[1, 2.0, 3.3],
               [4, 5, 6.5]])
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [7]:
a3 = np.array([[[1, 2, 3], [4, 5, 6]],
               [[7, 8, 9], [10, 11, 12]],
               [[13, 14, 15], [16, 17, 18]]])
print(type(a3), "\n", a3)

<class 'numpy.ndarray'> 
 [[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]

 [[13 14 15]
  [16 17 18]]]


In [17]:
print("a1 = shape => ", a1.shape, "\t\tdimension => ", a1.ndim, "\tdtype => ", a1.dtype, "\tsize => ", a1.size)
print("a2 = shape => ", a2.shape, "\t\tdimension => ", a2.ndim, "\tdtype => ", a2.dtype, "\tsize => ", a2.size)
print("a3 = shape => ", a3.shape, "\tdimension => ", a3.ndim, "\tdtype => ", a3.dtype, "\tsize => ", a3.size) 

a1 = shape =>  (3,) 		dimension =>  1 	dtype =>  int32 	size =>  3
a2 = shape =>  (2, 3) 		dimension =>  2 	dtype =>  float64 	size =>  6
a3 = shape =>  (3, 2, 3) 	dimension =>  3 	dtype =>  int32 	size =>  18


# Creating numpy arrays

In [25]:
ones0 = np.ones(2)
ones0

array([1., 1.])

In [26]:
ones1 = np.ones((2, 3))
ones1

array([[1., 1., 1.],
       [1., 1., 1.]])

In [28]:
zeros0 = np.zeros(3)
zeros0

array([0., 0., 0.])

In [29]:
zeros1 = np.zeros((3, 2))
zeros1

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [32]:
range_array = np.arange(0, 10, 2) # (start, end, step)
range_array

array([0, 2, 4, 6, 8])

In [33]:
random_array = np.random.randint(0, 10, size=(3, 5)) # (low, high, size)
random_array

array([[2, 5, 7, 1, 9],
       [0, 3, 5, 3, 1],
       [3, 9, 1, 4, 3]])

In [38]:
random_array2 = np.random.random((5, 3))
random_array2

array([[0.48152679, 0.16466367, 0.9832976 ],
       [0.54839369, 0.58201276, 0.25773217],
       [0.38631037, 0.99366358, 0.51262434],
       [0.8007107 , 0.97551644, 0.83071318],
       [0.88397096, 0.90291267, 0.64741972]])

In [37]:
random_array3 = np.random.rand(5, 3)
random_array3

array([[0.58229646, 0.43445329, 0.68795718],
       [0.00445884, 0.00795038, 0.58476525],
       [0.7334672 , 0.21119415, 0.4843974 ],
       [0.90294342, 0.61359352, 0.57057476],
       [0.72480634, 0.52177748, 0.02086773]])

## numpy random seed

In [4]:
# Pseudo-random numbers
np.random.seed(seed=0)
random_array_4 = np.random.randint(10, size=(4, 3))
random_array_4

array([[5, 0, 3],
       [3, 7, 9],
       [3, 5, 2],
       [4, 7, 6]])

In [43]:
np.random.seed(42)
random_array_5 = np.random.random((4, 5))
random_array_5

array([[0.37454012, 0.95071431, 0.73199394, 0.59865848, 0.15601864],
       [0.15599452, 0.05808361, 0.86617615, 0.60111501, 0.70807258],
       [0.02058449, 0.96990985, 0.83244264, 0.21233911, 0.18182497],
       [0.18340451, 0.30424224, 0.52475643, 0.43194502, 0.29122914]])

# Viewing array and matrices

In [51]:
arr1 = np.unique(random_array_4)
arr1

array([0, 2, 3, 4, 5, 6, 7, 9])

In [52]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [53]:
a2[0]

array([1. , 2. , 3.3])

In [54]:
a3

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]],

       [[13, 14, 15],
        [16, 17, 18]]])

In [55]:
a3[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [56]:
a3[0][0]

array([1, 2, 3])

#### 4D array/matrices

<span style="color: "></span>
start from the right-most side. Suppose we defined size=(2,3,4,5)
- We'll have 5 columns, 4 rows
- So the shape will be (4, 5) = (R, C)
- Then (2,<span style="color: red">3</span>,4,5) means we'll have 3 (4, 5) shaped arrays
- Then (<span style="color: red">2</span>,3,4,5) means we'll have 3 (4, 5) matrices 2 times
---

<span style="margin-top: 5vh">This information will be helpful for slicing the data</span>
```python
    a4[:, :, :, :3]
```
Here, we're just slicing the columns
<div style="border: 1px solid tan; width: 50%; text-align: center">
    
</div>

In [58]:
a4 = np.random.randint(10, size=(2, 3, 4, 5))
a4

array([[[[9, 2, 6, 3, 8],
         [2, 4, 2, 6, 4],
         [8, 6, 1, 3, 8],
         [1, 9, 8, 9, 4]],

        [[1, 3, 6, 7, 2],
         [0, 3, 1, 7, 3],
         [1, 5, 5, 9, 3],
         [5, 1, 9, 1, 9]],

        [[3, 7, 6, 8, 7],
         [4, 1, 4, 7, 9],
         [8, 8, 0, 8, 6],
         [8, 7, 0, 7, 7]]],


       [[[2, 0, 7, 2, 2],
         [0, 4, 9, 6, 9],
         [8, 6, 8, 7, 1],
         [0, 6, 6, 7, 4]],

        [[2, 7, 5, 2, 0],
         [2, 4, 2, 0, 4],
         [9, 6, 6, 8, 9],
         [9, 2, 6, 0, 3]],

        [[3, 4, 6, 6, 3],
         [6, 2, 5, 1, 9],
         [8, 4, 5, 3, 9],
         [6, 8, 6, 0, 0]]]])

# Manipulating arrays

In [59]:
a1

array([1, 2, 3])

In [60]:
ones = np.ones(3)
ones

array([1., 1., 1.])

In [61]:
a1 + ones

array([2., 3., 4.])

In [62]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [63]:
a1 * a2
#np.add(a1, a2)

array([[ 1. ,  4. ,  9.9],
       [ 4. , 10. , 19.5]])

In [64]:
print(a1.shape, a2.shape)

(3,) (2, 3)


In [65]:
print(a2.shape, a3.shape)

(2, 3) (3, 2, 3)


In [66]:
a2 * a3

array([[[  1. ,   4. ,   9.9],
        [ 16. ,  25. ,  39. ]],

       [[  7. ,  16. ,  29.7],
        [ 40. ,  55. ,  78. ]],

       [[ 13. ,  28. ,  49.5],
        [ 64. ,  85. , 117. ]]])

Some other operations that we can do

```python
np.add(x, y)
np.sin(x)
np.log(x)
np.exp(x)
```

## Aggregation

In [74]:
my_list = [1, 2, 3]
print(type(my_list))
print(sum(my_list))

<class 'list'>
6


In [73]:
print(a1)
print(type(a1))
print(np.sum(a1))

[1 2 3]
<class 'numpy.ndarray'>
6


- Use python's method (`sum()`) on python datatypes.

- Use numpy's method (`np.sum()`) on numpy datatypes.

#### Numpy arrays are faster than normal arrays

In [76]:
massive_array = np.random.random(100000)
massive_array[:10]

array([0.64218669, 0.52039374, 0.20171054, 0.41694017, 0.62176811,
       0.88172427, 0.79785515, 0.60734465, 0.22847096, 0.21669936])

In [77]:
%timeit sum(massive_array)
%timeit np.sum(massive_array)

20.9 ms ± 1.29 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
109 µs ± 1.34 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


##### So, always try to do numpy version of manipulation

In [78]:
a2

array([[1. , 2. , 3.3],
       [4. , 5. , 6.5]])

In [90]:
a2_mean = np.mean(a2)
a2_same_mean = np.sum(a2)/a2.size
print(a2_mean, a2_same_mean)

3.6333333333333333 3.6333333333333333


In [91]:
print("minimum value = ", np.min(a2), "\tmaximum value = ", np.max(a2))

minimum value =  1.0 	maximum value =  6.5


In [92]:
np.std(a2)

1.8226964152656422

> What is standard deviation?

![standard deviation](std.JPG)

In [93]:
np.var(a2)

3.3222222222222224

> What is variance?

![variance](variance.JPG)

In [94]:
print(np.std(a2), np.var(a2))
print(np.sqrt(np.var(a2)))

1.8226964152656422 3.3222222222222224
1.8226964152656422


# Reshape

In [67]:
a4 = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
               [[10, 11, 12], [13, 14, 15], [16, 17, 18]]])
a4

array([[[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9]],

       [[10, 11, 12],
        [13, 14, 15],
        [16, 17, 18]]])

In [68]:
print(a3.shape, a4.shape)

(3, 2, 3) (2, 3, 3)


In [69]:
a2 * a4 #this will give error

ValueError: operands could not be broadcast together with shapes (2,3) (2,3,3) 

#### Broadcasting error

There are some rules for broadcasting numpy arrays:
![broadcast](broadcast.JPG)

See the [documentation](https://numpy.org/doc/stable/user/basics.broadcasting.html) for details

> So how can we multiply a2 and a4?

We need to reshape one of those arrays

In [95]:
print(a2.shape, a4.shape)

(2, 3) (2, 3, 3)


In [96]:
a2_reshape = a2.reshape(2, 3, 1)
a2_reshape

array([[[1. ],
        [2. ],
        [3.3]],

       [[4. ],
        [5. ],
        [6.5]]])

In [97]:
a2_reshape * a4

array([[[  1. ,   2. ,   3. ],
        [  8. ,  10. ,  12. ],
        [ 23.1,  26.4,  29.7]],

       [[ 40. ,  44. ,  48. ],
        [ 65. ,  70. ,  75. ],
        [104. , 110.5, 117. ]]])

# Transpose

In [100]:
print(a2)
print("shape = ",a2.shape)

[[1.  2.  3.3]
 [4.  5.  6.5]]
shape =  (2, 3)


In [101]:
print(a2.T)
print("shape = ",(a2.T).shape)

[[1.  4. ]
 [2.  5. ]
 [3.3 6.5]]
shape =  (3, 2)


## Dot product and cross/elementwise multiplication product

[matrix multiplication](https://www.mathsisfun.com/algebra/matrix-multiplying.html)

[dot product](https://www.mathsisfun.com/algebra/vectors-dot-product.html)

#### Dot product

[playground](http://matrixmultiplication.xyz/ "play with this tools")

In [104]:
np.random.seed(0)

mat1 = np.random.randint(10, size=(5, 3))
mat2 = np.random.randint(10, size=(5, 3))
mat1

array([[5, 0, 3],
       [3, 7, 9],
       [3, 5, 2],
       [4, 7, 6],
       [8, 8, 1]])

In [105]:
mat2

array([[6, 7, 7],
       [8, 1, 5],
       [9, 8, 9],
       [4, 3, 0],
       [3, 5, 0]])

In [106]:
np.dot(mat1, mat2) #this will give error

ValueError: shapes (5,3) and (5,3) not aligned: 3 (dim 1) != 5 (dim 0)

we can get dot product for only this type of shaped matrices = <span style="color: red">(m\*n).(n\*p)</span>. And we'll get <span style="color: red">(m\*p)</span> shaped matrix

In [108]:
np.random.seed(0)

mat3 = np.random.randint(10, size=(3, 5))
print(mat1.shape, mat3.shape)

(5, 3) (3, 5)


In [111]:
dot_ans = np.dot(mat1, mat3)
print(dot_ans)
print("shape = ",dot_ans.shape)

[[ 46  18  39  39  38]
 [141  75 116  95  58]
 [ 74  27  50  35  43]
 [125  57  95  74  62]
 [119  30  72  48  89]]
shape =  (5, 5)


In [115]:
np.dot(mat3, mat1)

array([[102,  92,  46],
       [109,  92,  80],
       [117, 146, 140]])

for dot multiplication, position matters. As you can see different result for different position

#### element-wise product

In [114]:
mat1 * mat2

array([[30,  0, 21],
       [24,  7, 45],
       [27, 40, 18],
       [16, 21,  0],
       [24, 40,  0]])

In [116]:
mat2 * mat1

array([[30,  0, 21],
       [24,  7, 45],
       [27, 40, 18],
       [16, 21,  0],
       [24, 40,  0]])

In element-wise operation, position doesn't matter

# Practical Example

Turn image into numpy array

![panda](images/panda.png)

In [117]:
from matplotlib.image import imread

panda = imread("images/panda.png")
type(panda)

numpy.ndarray

In [118]:
panda.size, panda.shape, panda.ndim

(24465000, (2330, 3500, 3), 3)

![car](images/car-photo.png)

In [119]:
car = imread("images/car-photo.png")
car.size, car.shape, car.ndim

(991300, (431, 575, 4), 3)