In [1]:
import numpy as np
import pandas as pd

## Array Creation using NumPy library

In [2]:
ls = [10, 20, 30, 40, 50]
ls

[10, 20, 30, 40, 50]

In [4]:
type(ls)

list

In [5]:
arr = np.array(ls)
arr

array([10, 20, 30, 40, 50])

In [6]:
type(arr)

numpy.ndarray

- ndarray just represents n dimensional array.
- n dimensional arrays are the core building blocks of NumPy.

In [7]:
## 2nd array creation function
l = list(range(0,10))
l

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [8]:
np.arange(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [10]:
## 3rd array creation funciton
## I want to create an array filled with random numbers between 5 and 50
np.random.randint(5, 50, 10)

array([35, 19, 33, 46, 37, 21, 44, 27, 24, 40])

## Indexing

In [13]:
arr = np.arange(30, 40)
arr

array([30, 31, 32, 33, 34, 35, 36, 37, 38, 39])

In [14]:
arr[2]

32

In [15]:
arr[5]

35

In [16]:
arr[0]

30

- Indexing in arrays starts at position 0.

In [18]:
arr

array([30, 31, 32, 33, 34, 35, 36, 37, 38, 39])

In [19]:
arr[-2]

38

In [20]:
arr[-1]

39

- Negative Indexing in arrays starts at position -1.

## Slicing : 
- Get a sub array from the original array

In [21]:
arr

array([30, 31, 32, 33, 34, 35, 36, 37, 38, 39])

In [22]:
arr[3 : 8]

array([33, 34, 35, 36, 37])

## Statistical functions on an array

In [24]:
arr1 = np.random.randint(5, 50, 12)
arr1

array([42, 11, 33, 44, 30,  7, 15, 48, 21, 45, 16, 13])

In [25]:
## Display the smallest value from the array

np.min(arr1)

7

In [26]:
## Display the largest value from the array
np.max(arr1)

48

In [27]:
arr2 = np.array([3, 11, 4, 6, 8, 9, 6])
arr2

array([ 3, 11,  4,  6,  8,  9,  6])

In [28]:
np.mean(arr2)

6.714285714285714

In [29]:
arr3 = np.array([5, 9, 11, 9, 7])
arr3

array([ 5,  9, 11,  9,  7])

In [30]:
np.median(arr3)

9.0

In [31]:
arr4 = np.array([2, 5, 1, 4, 2, 7])
arr4

array([2, 5, 1, 4, 2, 7])

In [34]:
int(np.median(arr4))

3

In [35]:
arr5 = np.array([2, 3, 6, 3, 7, 5, 1, 2, 3, 9])
arr5

array([2, 3, 6, 3, 7, 5, 1, 2, 3, 9])

In [36]:
np.mode(arr5)

AttributeError: module 'numpy' has no attribute 'mode'

- mode() is not present in the numpy library hence raises an error

## NumPy Array attributes

In [38]:
arr1

array([42, 11, 33, 44, 30,  7, 15, 48, 21, 45, 16, 13])

In [39]:
## count the total value present inside the array

len(arr1)

12

In [44]:
arr1.shape

(12,)

In [40]:
np.count(arr1)

AttributeError: module 'numpy' has no attribute 'count'

- count() is not present in NumPy library hence raises an error

In [41]:
arr1

array([42, 11, 33, 44, 30,  7, 15, 48, 21, 45, 16, 13])

In [42]:
## calc the total of all the values

sum(arr1)

325

In [43]:
np.sum(arr1)

325

In [45]:
## Checking the dimensions of an array

arr1.ndim

1

## Array dtypes methods

In [46]:
a = np.array([10, 20.5, 30, 40, 50])
a

array([10. , 20.5, 30. , 40. , 50. ])

- array() converts the int values into float values internally.
- This is called as implicit conversion

In [47]:
a1 = np.array([10, 20, '30', 40.57, 50, 70])
a1

array(['10', '20', '30', '40.57', '50', '70'], dtype='<U32')

- NumPy always converts the values of the array into higher data types.
- str > float > int

## Arithematic Operations on the arrays

In [48]:
arr1 = np.array([10, 20, 30, 40, 50])
arr1

array([10, 20, 30, 40, 50])

In [49]:
arr2 = np.array([60, 70, 80, 90, 100])
arr2

array([ 60,  70,  80,  90, 100])

In [50]:
## Add the 2 arrays

arr1 + arr2 

array([ 70,  90, 110, 130, 150])

- Addition is done by adding the value at the respective index locaitons 
- example value at index 0 of arr1 is added to value at index 0 of arr2, ....
- Element wise addition

In [51]:
arr3 = np.array([100, 200, 300, 400])
arr3

array([100, 200, 300, 400])

In [52]:
arr1 + arr3

ValueError: operands could not be broadcast together with shapes (5,) (4,) 

In [53]:
arr1

array([10, 20, 30, 40, 50])

In [54]:
## Multiply the array with 5

arr1 * 5

array([ 50, 100, 150, 200, 250])

In [55]:
arr1 / 2

array([ 5., 10., 15., 20., 25.])

In [56]:
arr1 // 2

array([ 5, 10, 15, 20, 25], dtype=int32)

In [57]:
arr1

array([10, 20, 30, 40, 50])

In [58]:
arr2

array([ 60,  70,  80,  90, 100])

In [59]:
arr2 - arr1 

array([50, 50, 50, 50, 50])

## Conditional Indexing or Boolean Indexing

In [61]:
arr1 = np.arange(5, 20)
arr1

array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [62]:
## Find out all the values from the array that are greater than 10

arr1 > 10

array([False, False, False, False, False, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True])

In [63]:
arr1[arr1 > 10]

array([11, 12, 13, 14, 15, 16, 17, 18, 19])

In [64]:
## Find out all the values from the array that are greater than 10 and less than 17
## Condition 1 : values greater than 10
## Condition 2 : values less than 17

In [65]:
arr1

array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [66]:
arr1 > 10

array([False, False, False, False, False, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True])

In [67]:
arr1 < 17

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False, False, False])

In [68]:
arr1[(arr1 > 10) & (arr1 < 17)]

array([11, 12, 13, 14, 15, 16])

In [69]:
arr1[(arr1 > 10) and (arr1 < 17)]

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

- "&" is a bit wise and operator, "and" is a logical and operator
- Always use the "&" for 2 conditions

In [70]:
## Display all the values from the array that are divisible by 5

arr1

array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [None]:
arr1 % 5 == 0

In [71]:
arr1[arr1 % 5 == 0]

array([ 5, 10, 15])

In [72]:
## Display all the values from the array that are divisible by 3 and 5 both
## Condtions I : values should be divisible by 3
## Condtions II : values should be divisible by 5

In [73]:
arr1[(arr1 % 5 == 0) & (arr1 % 3 == 0)]

array([15])

In [74]:
## Display all the values from the array that are divisible by 3 or 5

In [75]:
arr1[(arr1 % 5 == 0) | (arr1 % 3 == 0)]

array([ 5,  6,  9, 10, 12, 15, 18])

## 2D array creation

In [76]:
l1 = [[10, 20, 30], [40, 50, 60], [70, 80, 90]]
l1

[[10, 20, 30], [40, 50, 60], [70, 80, 90]]

In [78]:
a1 = np.array(l1)
a1

array([[10, 20, 30],
       [40, 50, 60],
       [70, 80, 90]])

In [79]:
a1.ndim

2

In [80]:
a1.shape

(3, 3)

In [81]:
len(a1)

3