In [1]:
## Data Analysis libraries

import pandas as pd
import numpy as np

## Data Visualization libraries

import matplotlib.pyplot as plt
import seaborn as sns

### What is NumPy

- It is a scientific library used to numerical operations. (Numerical - Num, Python - Py, i.e. NumPy)
- The main data structure of NumPy is arrays.
- It is the foundational library used for python programming  specifically for machine learning.
- Pandas and SciPy are the advanced libraries which are built on NumPy.

### Creating a numpy array

In [2]:
li = [10, 20, 30, 40, 50]
li

[10, 20, 30, 40, 50]

In [3]:
## Create a numpy from the above the list

arr = np.array(li)
arr

array([10, 20, 30, 40, 50])

In [4]:
## Check whether arr is a numpy array

type(arr)

numpy.ndarray

- ndarray means n - dimensional array

In [5]:
l1 = [10, 20, 30, 45.5, 60]
l1

[10, 20, 30, 45.5, 60]

In [6]:
arr1 = np.array(l1)
arr1

array([10. , 20. , 30. , 45.5, 60. ])

In [7]:
l2 = [100, 250, 350.75, 'Anil']
l2

[100, 250, 350.75, 'Anil']

In [8]:
arr2 = np.array(l2)
arr2

array(['100', '250', '350.75', 'Anil'], dtype='<U32')

- Note: Array is a homogeneous data structure. Inside the array data must be of same data type

### Addition and Multiplication operations on lists and numpy arrays

In [9]:
li

[10, 20, 30, 40, 50]

In [10]:
l1

[10, 20, 30, 45.5, 60]

In [11]:
li + l1

[10, 20, 30, 40, 50, 10, 20, 30, 45.5, 60]

- plus operation on a list is concatenation

In [12]:
arr

array([10, 20, 30, 40, 50])

In [13]:
arr1

array([10. , 20. , 30. , 45.5, 60. ])

In [14]:
arr + arr1

array([ 20. ,  40. ,  60. ,  85.5, 110. ])

- plus operation will add the data inside the array element wise.

### Note : The number of elements must be same in both the arrays else you get a value error as in the below example

In [15]:
arr3 = np.array([100, 200, 300])
arr4 = np.array([600, 700, 800, 900])

arr3 + arr4

ValueError: operands could not be broadcast together with shapes (3,) (4,) 

### Multiplication operation on lists and arrays

In [16]:
li

[10, 20, 30, 40, 50]

In [17]:
li * 3

[10, 20, 30, 40, 50, 10, 20, 30, 40, 50, 10, 20, 30, 40, 50]

- multiplication operation on lists is a repetition operation

In [19]:
arr

array([10, 20, 30, 40, 50])

In [20]:
arr * 3

array([ 30,  60,  90, 120, 150])

- multiplication operation on an array will perform the multiplication

### Division and subtraction operation on lists and arrays

- Division and subtraction cannot be performed on lists

In [21]:
li

[10, 20, 30, 40, 50]

In [22]:
l1

[10, 20, 30, 45.5, 60]

In [25]:
## li / l1    ## Division not possible on a list

li - l1   ## Substraction not possible on a list

TypeError: unsupported operand type(s) for -: 'list' and 'list'

In [27]:
arr4

array([600, 700, 800, 900])

In [28]:
arr4 / 2

array([300., 350., 400., 450.])

In [31]:
### To get integer values after division as an output we use //

arr4 // 2

array([300, 350, 400, 450], dtype=int32)

In [32]:
arr5 = np.array([150, 300, 450, 600, 750, 900])
arr6 = np.array([100, 200, 300, 400, 500, 600])

arr5

array([150, 300, 450, 600, 750, 900])

In [33]:
arr6

array([100, 200, 300, 400, 500, 600])

In [34]:
arr5 - arr6

array([ 50, 100, 150, 200, 250, 300])

## Array Indexing

In [35]:
arr = np.array([23, 457, 49, 75, 278, 891])
arr

array([ 23, 457,  49,  75, 278, 891])

In [36]:
arr[2]

49

In [37]:
arr[5]

891

In [40]:
arr[-3]

75

In [41]:
arr[-6]

23

### Array Slicing

In [38]:
## Display [49, 75, 278] in the output

arr[2 : 5]

array([ 49,  75, 278])

In [39]:
## ## Display [49, 75, 278, 891] in the output

arr[2 : 6]

array([ 49,  75, 278, 891])

In [42]:
arr

array([ 23, 457,  49,  75, 278, 891])

In [43]:
arr[2 : 4 : 2]

array([49])

In [44]:
arr[1 : 6 : 3]

array([457, 278])

#### Array Indexing syntax
- array_name[start_index : end_index, step_index]

Note: start_index will be considered, end_index will not considered, step_index is used to add to the start_index

## Special arrays creation functions

In [46]:
zeros = np.zeros(3)
zeros

array([0., 0., 0.])

In [48]:
z1 = np.zeros(10)
z1

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [50]:
z2 = np.zeros((3, 4))
z2

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [51]:
o1 = np.ones(7)
o1

array([1., 1., 1., 1., 1., 1., 1.])

In [53]:
o2 = np.ones((4,3))
o2

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [55]:
o3 = np.ones((3, 3, 2))
o3

array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]])

In [56]:
arr = np.arange(15)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [57]:
np.arange(200, 800, 50)

array([200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750])

In [58]:
np.arange(20, 50, 3)

array([20, 23, 26, 29, 32, 35, 38, 41, 44, 47])

In [60]:
arr1 = np.linspace(10, 80, 5)   ## 5 is not the gap instead 5 is the no of elements to be present in the outpu
arr1

array([10. , 27.5, 45. , 62.5, 80. ])

- linspace is used to create an array with equal spacing between any two values of the array

- 27.5 - 10 = 17.5
- 45 - 27.5 = 17.5
- 62.5 - 45 = 17.5
- 80 - 62.5 = 17.5

In [61]:
arr2 = np.linspace(25, 75, 4)
arr2

array([25.        , 41.66666667, 58.33333333, 75.        ])

In [86]:
np.full(5, "Anant")

array(['Anant', 'Anant', 'Anant', 'Anant', 'Anant'], dtype='<U5')

In [87]:
np.full((3,2), "Hello")

array([['Hello', 'Hello'],
       ['Hello', 'Hello'],
       ['Hello', 'Hello']], dtype='<U5')

In [101]:
np.full(20, 10)

array([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
       10, 10, 10])

In [102]:
np.full(5, 'Hello')

array(['Hello', 'Hello', 'Hello', 'Hello', 'Hello'], dtype='<U5')

### Arithematic Operations on array

- Addition (+)
- Multiplication (*)
- Float Division (/)
- Int Division (//)
- Substraction (-)

- For  examples refer above cells where we discussed difference between lists and arrays

### Statistical operations on an array

In [64]:
## mean operation on an array

arr = np.arange(10, 25)
arr

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24])

In [65]:
np.mean(arr)

17.0

In [67]:
### calculate median of the array

np.median(arr)

17.0

In [68]:
np.std(arr)    ## std is for standard deviation

4.320493798938574

In [70]:
np.var(arr)     ## var stands for variance

18.666666666666668

In [75]:
arr.shape   ## shape is used for counting the total values present in an array

(15,)

In [78]:
z = np.zeros(8)
z

array([0., 0., 0., 0., 0., 0., 0., 0.])

In [79]:
z.ndim

1

In [82]:
z1 = np.zeros((4,3))
z1

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [83]:
z1.ndim

2

In [84]:
o3 = np.ones((3,3,2))
o3

array([[[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]],

       [[1., 1.],
        [1., 1.],
        [1., 1.]]])

In [85]:
o3.ndim

3

## Boolean Indexing (Conditional Indexing)

In [88]:
arr

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24])

In [90]:
## Display only [18, 19, 20, 21, 22, 23, 24]

arr > 17

array([False, False, False, False, False, False, False, False,  True,
        True,  True,  True,  True,  True,  True])

In [91]:
arr[arr > 17]

array([18, 19, 20, 21, 22, 23, 24])

In [92]:
arr = np.array([123, 5, 37, 59, 175, 84, 3, 38])
arr

array([123,   5,  37,  59, 175,  84,   3,  38])

In [93]:
## Display [59, 84, 123, 175]

arr[arr > 50]

array([123,  59, 175,  84])

In [94]:
### Display values greater than 30 and less than 100

arr[(arr > 30) & (arr < 100)]

array([37, 59, 84, 38])

In [96]:
arr1 = np.arange(25, 50)
arr1

array([25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
       42, 43, 44, 45, 46, 47, 48, 49])

In [99]:
### Display all the value divisible by 3 and 5 both

arr1[(arr1 % 3 == 0) & (arr1 % 5 == 0)]

array([30, 45])

In [100]:
### Display all the value divisible by 3 or 5 

arr1[(arr1 % 3 == 0) | (arr1 % 5 == 0)]

array([25, 27, 30, 33, 35, 36, 39, 40, 42, 45, 48])