# Numpy

1. Numpy stands for numerical python
2. Numpy is used to perform advanced numerical computations on large datasets
3. Numpy can perform complex operations with less lines of code
4. It provides a very useful datastructure called array

syntax
import numpy

check version of numpy
np.__version__

Documentation available at numpy.org

In [2]:
import numpy as np

np.__version__

'1.24.2'

# Array
- Array is the collection of homohenous(same type) elements.

# np.array
Creation of array use np.array

In [5]:
lst = [10,20,30,40,50]
arr = np.array(lst)
print(arr, type(arr))

[10 20 30 40 50] <class 'numpy.ndarray'>


# List vs array
- With list we have to apply loop to perform element by elemnt operation.
    with array we can easily perform some operation rather than list
- A list has to store a lot of information for each element seperately bacause each element behaves differently in a list(heterogenous elments). Array is a collection of homogenous elements, so no need to manage elements individually.
- Arrays are faster than list

In [7]:
#Multiply each element of following list by 5

#List approach
lst=[10,20,30,40,50,60]
print(lst, type(lst))
for i in range(len(lst)):
    lst[i]*=5
print(lst)

[10, 20, 30, 40, 50, 60] <class 'list'>
[50, 100, 150, 200, 250, 300]


In [8]:
#Array approach
lst=[10,20,30,40,50,60]
arr = np.array(lst)
arr*5

array([ 50, 100, 150, 200, 250, 300])

# Why are array faster than list
- On list we have to use loops to perform some operation. Loops make the process slow
- Array's use vectorised operations by using array broadcasting and this process is much faster than loops.
- A list is a caoolection of heterogenous elements, whereas array is a collection of homogenous elements.

# Array upcasting
When we pass heterogenous elements to an array, it converts them to one type. This process is known as array upcasting

Array upcasting is using below hierarchy while upasting as below:
int -> float -> complex -> string -> object

In [10]:
arr = np.array([10,20,30,40,50])
arr.dtype

dtype('int64')

In [11]:
arr = np.array([10,20,30,40,50.0])
arr.dtype

dtype('float64')

In [15]:
arr = np.array([10,20+5j,30,40.7,50])
print(arr.dtype)
print(arr)

complex128
[10. +0.j 20. +5.j 30. +0.j 40.7+0.j 50. +0.j]


In [18]:
# In numpy, string are not available. It is represented by u representation. u=unicode
arr = np.array([10.5,20+5j,30,40.7,'50'])
print(arr.dtype)
print(arr)

<U64
['10.5' '(20+5j)' '30' '40.7' '50']


In [21]:
#When using derived datatype, compiler throws an array, to supress this defing dtype=object
arr = np.array([10.5,20+5j,30,[40.7,60.1],'50'])
print(arr.dtype)
print(arr)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (5,) + inhomogeneous part.

In [22]:
arr = np.array([10.5,20+5j,30,[40.7,60.1],'50'], dtype=object)
print(arr.dtype)
print(arr)

object
[10.5 (20+5j) 30 list([40.7, 60.1]) '50']


In [24]:
arr = np.array([10.5,20+5j,30,40.7,60,'50',None, True])
print(arr.dtype)
print(arr)

object
[10.5 (20+5j) 30 40.7 60 '50' None True]


# Attributes on arrays

- dtype: Returns type of the elements an array contains
- size: Returns the number of elements an array contains
- shape: Returns a tuple. If tuple has single elements it represents number of elements
        If tuple has two elements, then first element is number of rows and second is number of columns.
- ndim : Returns the dimention of the array. 1d is called a vector

In [31]:
arr = np.array([10,20,30,40,50])
print(arr.dtype)
print(arr.size)
print(arr.shape)
print(arr.ndim)

int64
5
(5,)
1


In [33]:
arr = np.array([[1,2,3],[4,5,6]])
print(arr.shape)
print(arr.ndim)

(2, 3)
2


# Creating different types of arrays

1. np.arange()
syntax: np.arrange(start, stop, step)

2. np.linspace()
Used to create an array of equally spaced values within a range of values
Difference between any two consequtive values is same across the elements of array 
syntax: np.linspace(start, stop, number of values)

3. np.zeros()
Returns an array of given shape where all elements of array is zero. The dimention of array needs to be passed in a tuple

4. np.ones()
Returns an array of given shape where all elements of array is one.

5. np.eye()
Returns identity matrix

6. np.zeros_like()
It returns an array of zeros of the shape of given matrix

7. np.ones_like()
It returns an array of ones of the shape of given matrix

8. np.full_like()
It returns an array of the shape of given array and given value

9. np.diagonal()
It returns the principle diagonal from the array

10. np.diag()
It returns diagonal array. 
In a diagonal array, diagonal are non-zeros where rest of the elements are zeros

11. np.trace()
It returns the sum of the diagonal of an array


In [37]:
#Create a array that has the number 1 to 10
lst=[i for i in range(1,10)]
print(np.array(lst))

print(np.arange(1,11))

#Create a array of even numbers frpm 2 to 20
print(np.arange(2,21,2))

[1 2 3 4 5 6 7 8 9]
[ 1  2  3  4  5  6  7  8  9 10]
[ 2  4  6  8 10 12 14 16 18 20]


In [41]:
# np.linspace()
#Create  an array of 5 equally spaced values between 1 and 3

print(np.linspace(1,3,5))

print(np.linspace(1,10,20))

[1.  1.5 2.  2.5 3. ]
[ 1.          1.47368421  1.94736842  2.42105263  2.89473684  3.36842105
  3.84210526  4.31578947  4.78947368  5.26315789  5.73684211  6.21052632
  6.68421053  7.15789474  7.63157895  8.10526316  8.57894737  9.05263158
  9.52631579 10.        ]


In [43]:
#Create a 3X3 matrix where all elements are 0
np.zeros((3,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [44]:
np.zeros((3,3), dtype=int)

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [47]:
x= np.ones((3,3), dtype=int)
print(x.dtype)
print(x)

int64
[[1 1 1]
 [1 1 1]
 [1 1 1]]


Class record Aoril 27


In [4]:
import numpy as np
np.eye(4,4, dtype=int)

array([[1, 0, 0, 0],
       [0, 1, 0, 0],
       [0, 0, 1, 0],
       [0, 0, 0, 1]])

In [6]:
array=([[2,4,6],[6,8,0]])
print(np.zeros_like(array))

print(np.ones_like(array))

[[0 0 0]
 [0 0 0]]
[[1 1 1]
 [1 1 1]]


In [8]:
print(np.full_like(array, 50))

print(np.full_like(array, 50.5, dtype=float))

[[50 50 50]
 [50 50 50]]
[[50.5 50.5 50.5]
 [50.5 50.5 50.5]]


In [10]:
arr = np.random.randint(10,100,25).reshape(5,5)
print(arr)

[[74 72 14 20 44]
 [10 37 90 39 58]
 [75 67 37 31 38]
 [93 71 98 86 51]
 [28 23 24 59 97]]


In [11]:
np.diagonal(arr)

array([74, 37, 37, 86, 97])

In [12]:
# To get one upper diagonal ude offset=1 and so on
np.diagonal(arr, offset=1)

array([72, 90, 31, 51])

In [13]:
#Only pass the diagonal elements
np.diag([2,4,6,8])

array([[2, 0, 0, 0],
       [0, 4, 0, 0],
       [0, 0, 6, 0],
       [0, 0, 0, 8]])

In [17]:
#Convert an array in to a diagonal matrix
print(arr)
print(np.diag(arr.diagonal()))

[[74 72 14 20 44]
 [10 37 90 39 58]
 [75 67 37 31 38]
 [93 71 98 86 51]
 [28 23 24 59 97]]
[[74  0  0  0  0]
 [ 0 37  0  0  0]
 [ 0  0 37  0  0]
 [ 0  0  0 86  0]
 [ 0  0  0  0 97]]


In [22]:
print(np.diagonal(arr).sum())

print(np.trace(arr))

print(np.trace(arr, offset=1))

331
331
244


In [23]:
help(np.trace)

Help on function trace in module numpy:

trace(a, offset=0, axis1=0, axis2=1, dtype=None, out=None)
    Return the sum along diagonals of the array.
    
    If `a` is 2-D, the sum along its diagonal with the given offset
    is returned, i.e., the sum of elements ``a[i,i+offset]`` for all i.
    
    If `a` has more than two dimensions, then the axes specified by axis1 and
    axis2 are used to determine the 2-D sub-arrays whose traces are returned.
    The shape of the resulting array is the same as that of `a` with `axis1`
    and `axis2` removed.
    
    Parameters
    ----------
    a : array_like
        Input array, from which the diagonals are taken.
    offset : int, optional
        Offset of the diagonal from the main diagonal. Can be both positive
        and negative. Defaults to 0.
    axis1, axis2 : int, optional
        Axes to be used as the first and second axis of the 2-D sub-arrays
        from which the diagonals should be taken. Defaults are the first two
       

# Reshaping of array

1. reshape()
It reshapes the given array as per given dimention
It returns a new array of given shape

2. shape()
It converts the array into the array of given shape 
It makes the changes in the existing array
syntax: arr.shape(roes, cols)

3. T()
Transpose of a matrix

In [28]:
# Create a new array and reshape it to a 3X3 array
x=np.arange(1,10)
print(x)
print(x.reshape(3,3))

[1 2 3 4 5 6 7 8 9]
[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [29]:
# If rows*cols is equal to number of elements in matrix only then reshaping is possible
x=np.arange(1,11)
print(x)
print(x.reshape(3,3))

[ 1  2  3  4  5  6  7  8  9 10]


ValueError: cannot reshape array of size 10 into shape (3,3)

In [37]:
x = np.arange(1,10)
x.shape=(3,3)
print(x)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


# Transpose of an array
Converting rows into colummns
syntax: x.T

In [41]:
x

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [42]:
x.shape

(4, 6)

In [43]:
#It returns a new array which is the transpose of the given array
x.T

array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20],
       [ 3,  9, 15, 21],
       [ 4, 10, 16, 22],
       [ 5, 11, 17, 23]])

# Creating of 2D array

1. By using a 2D list
[[10,8,6], [6,8,0], [1,5,0]]

2. By reshaping tha array

In [46]:
x=np.array([[10,8,6], [6,8,0], [1,5,0]])
print(x.shape)

print(x.ndim)

(3, 3)
2


In [50]:
x=np.array([1,2,3,4,5,6]).reshape(2,3)
x

array([[1, 2, 3],
       [4, 5, 6]])

# Creating a 3D array

When we repeate a 2D array then it becomes a 3D array

1. By using a 3D array

2. By reshaping the array
reshape(number of arrays, rows, cols)
shape(number of arrays, rows, cols)

In [56]:
array3d = [ [[10,8,6], [6,8,0], [1,5,0]] , [[10,8,6], [6,8,0], [1,5,0]] , [[10,8,6], [6,8,0], [1,5,0]] ]

x=np.array(array3d)
print(x.ndim)
print(x.shape)
print(x)

3
(3, 3, 3)
[[[10  8  6]
  [ 6  8  0]
  [ 1  5  0]]

 [[10  8  6]
  [ 6  8  0]
  [ 1  5  0]]

 [[10  8  6]
  [ 6  8  0]
  [ 1  5  0]]]


In [59]:
#Create a 3D array with shape 2X4X6
# @ arrays, each of shape 4X6

arr = np.arange(48).reshape(2,4,6)
print(arr.ndim)
print(arr.shape)
print(arr)

3
(2, 4, 6)
[[[ 0  1  2  3  4  5]
  [ 6  7  8  9 10 11]
  [12 13 14 15 16 17]
  [18 19 20 21 22 23]]

 [[24 25 26 27 28 29]
  [30 31 32 33 34 35]
  [36 37 38 39 40 41]
  [42 43 44 45 46 47]]]


# Slicing and Indexing of arrays

# 1D Array
The slicing and indexing of 1D array is exactly similar to slicing anf indexing of a list
Arrays support positive and negative indexing

# Slicing
arr[start : Stop : step]

In [69]:
arr = np.array([10,20,30,40,50])
print(arr.shape)

#Fetch first and last item from array
print(arr[0])
print(arr[len(arr)-1])

(5,)
10
50


In [71]:
print(arr[0:4:2])

[10 30]


In [73]:
#Every second element in reverse order
arr[::-2]

array([50, 30, 10])

# Slicing and Indexing of 2D arrays

In [76]:
arr = np.array([[10,20,30],[40,50,60],[70,80,90]])
arr

array([[10, 20, 30],
       [40, 50, 60],
       [70, 80, 90]])

In [77]:
arr[0]

array([10, 20, 30])

In [78]:
arr[len(arr)-1]

array([70, 80, 90])

In [83]:
#Fetch 20 from array
print(arr[0,1])

#Fetch 90
print(arr[2,2])
print(arr[-1,-1])
print(arr[2,-1])
print(arr[-1,2])

20
90
90
90
90


Class record April 28, 2023
# Slicing and indexing 2D array



# Slicing and indexing 3D array

Syntax
arr[array_index]
arr[array_index, row_index]
arr[array_index, row_index, col_index]

In [7]:
import numpy as np
#arr= np.arange(10,100,60).reshape(3,3,3)
arr = np.random.randint(10,100,60).reshape(3,4,5)
arr

array([[[82, 43, 17, 44, 32],
        [88, 64, 35, 36, 41],
        [59, 59, 26, 16, 77],
        [27, 30, 17, 73, 33]],

       [[88, 36, 90, 56, 18],
        [82, 98, 33, 25, 47],
        [92, 49, 25, 85, 94],
        [84, 17, 59, 77, 16]],

       [[67, 17, 29, 16, 27],
        [51, 33, 87, 65, 88],
        [87, 60, 59, 33, 45],
        [81, 77, 45, 85, 35]]])

# Slicing 3D array

arr[array_slicing]
arr[start: stop: step]

arr[array_slicing, row_slicing]
arr[start: stop: step, start: stop: step]

arr[array_slicing, row_slicing, col_slicing]
arr[start: stop: step, start: stop: step, start: stop: step ]

In [10]:
#Fetch first 2 rows from fitsr array
arr[0, 0:2]

array([[82, 43, 17, 44, 32],
       [88, 64, 35, 36, 41]])

In [15]:
#First row from all the arrays
arr[::,0]

array([[82, 43, 17, 44, 32],
       [88, 36, 90, 56, 18],
       [67, 17, 29, 16, 27]])

In [16]:
#First column from all the arrays

arr[::, ::, 0]

array([[82, 88, 59, 27],
       [88, 82, 92, 84],
       [67, 51, 87, 81]])

In [22]:
#Get the last 2 columns from first two arrays
arr[0:2,::, 3:]

array([[[44, 32],
        [36, 41],
        [16, 77],
        [73, 33]],

       [[56, 18],
        [25, 47],
        [85, 94],
        [77, 16]]])

# Array of random values

ranodm.random()
-  It generates specified number of random values
- the random value is always betweeen 0 and 1

random.rand()
- It returns array of random values of given shape
- the random value is always betweeen 0 and 1

random.randint()
- It returns an array of random int values between a range

random.seed() - For fixing random state
- It is used to set the random state
- If we fix the random state, then random will always return the same set of values always
- Range 0 to 2**32-1 (2 power 23 minus 1)

In [23]:
np.random.random(20)

array([0.49623274, 0.65734901, 0.18493905, 0.84173756, 0.27650669,
       0.3907566 , 0.12928333, 0.43065145, 0.16333071, 0.59203537,
       0.6741164 , 0.93160071, 0.54678787, 0.35847236, 0.27493592,
       0.36637895, 0.07102667, 0.15222031, 0.21681641, 0.22476558])

In [25]:
#Create a 3X3 matrix of random values
np.random.random(9).reshape(3,3)

array([[0.18014571, 0.96258245, 0.12276912],
       [0.12335971, 0.15563854, 0.19328482],
       [0.16312674, 0.91704652, 0.45474198]])

In [26]:
#Create a 3X3 matrix of random values
np.random.rand(3,3)

array([[0.62560465, 0.71928699, 0.55431019],
       [0.28393006, 0.16615092, 0.3170114 ],
       [0.28790169, 0.46581386, 0.88314331]])

In [27]:
#Both produces the same output
np.random.random(30).reshape(5,6)

np.random.rand(5,6)

array([[0.06879125, 0.72109949, 0.04893655, 0.54236191, 0.61896883,
        0.3709638 ],
       [0.06090265, 0.02264882, 0.31810274, 0.55108834, 0.47204431,
        0.37796314],
       [0.16764944, 0.83824661, 0.79218482, 0.33542319, 0.69867272,
        0.13710592],
       [0.70927255, 0.6202892 , 0.5814108 , 0.06213762, 0.72991383,
        0.22090194],
       [0.54870639, 0.07361161, 0.33844932, 0.42087744, 0.02969923,
        0.12843234]])

In [35]:
#Generate 10 random integer numbers between 10 and 50
np.random.randint(10,50, 10)

array([44, 29, 26, 41, 37, 20, 35, 12, 22, 15])

In [36]:
#Create a 5X4 matrix of random int nu,ners between 10 and 100
#For a 5X4 matrix, we need 20 elements

np.random.randint(10,100,20).reshape(5,4)

array([[73, 13, 30, 31],
       [46, 77, 37, 22],
       [64, 73, 39, 80],
       [36, 92, 89, 57],
       [46, 97, 89, 19]])

In [63]:
np.random.seed(46)
np.random.randint(10,50,6)

array([15, 29, 28, 21, 39, 37])

Class record May 2

random.normal()
It returns a array of normal values whose valuea are normally distributed 

In [1]:
import numpy as np

np.random.normal(100,5,150)


array([100.29194216,  95.30214088,  93.99171864, 106.59532594,
        88.23638138,  99.49738957,  93.75240204, 101.7536317 ,
        89.74258843, 103.32773203,  97.28285419, 101.05550871,
        90.56251911,  95.6097397 , 100.31577219,  97.21887722,
        98.35219628, 101.23731522, 101.96110496, 101.24216592,
        99.64278339,  97.93031401,  98.21059988, 112.58307167,
       103.30717891, 102.00978429, 100.67316322,  97.91358257,
       104.51688369, 104.84663181,  82.95906222, 102.76175869,
       102.25475135,  91.04351779, 100.72596492, 100.83840414,
        96.70434368,  96.83786857,  97.85917736, 104.91679254,
        93.12473967,  98.83830798, 103.12811445, 109.73513545,
       103.69259342, 102.68087097,  96.12289791, 105.46007523,
       110.47177707, 103.16286073,  96.08787878, 101.3918999 ,
       102.85965749, 105.33301056, 107.91379066, 103.27152168,
       104.6734338 , 100.98363051,  91.56274428,  92.51808361,
       101.47908208, 111.29980222,  98.4690419 ,  98.92

# Functions on arrays
1. np.sum() It returns the sum of all the elements of the array

when 
axis=0, (Intrepreter reads the array top to bottom, row wise operation) 
axis=1, (Intrepreter reads the array left to right, column wise operation)
The axis parameter can be passes to all the functions 

2. np.mean()
It returns the average of the array (mean in stats is avaerage in maths)

3. np.median()
It returns the median of the array
np.median(array)

4. np.std()
It returns the standard deviation

5. np.var()
It returns the variance, it is the square of standard 

6. np.max()
It returns the maximum of the array

7. np.min()
It returns the minimum of the array

8. np.percentile()
It returns the specified percentile value from the array.
Percentile is the cutoff value
np.percentile(array, percentile_value)

In [5]:
np.random.seed(42)
x=np.random.randint(10,100,12).reshape(3,4)
x

array([[61, 24, 81, 70],
       [30, 92, 96, 84],
       [84, 97, 33, 12]])

In [16]:
print(np.sum(x))

print(x[:1].sum())

764
236


In [7]:
np.sum(x, axis=1) # 61+24+81+70, ...

array([236, 302, 226])

In [8]:
np.sum(x, axis=0) #61+30+84, ...

array([175, 213, 210, 166])

In [9]:
x.mean()

63.666666666666664

In [17]:
#Median cannot be called on array, it should be called via np module
np.median(x)

75.5

In [18]:
x.std()

29.51082663852182

In [19]:
x.std(axis=1)

array([21.41261311, 26.62235902, 35.10341864])

In [20]:
x.max()

97

In [21]:
x.max(axis=0)

array([84, 97, 96, 84])

In [22]:
np.percentile(x, 99)

96.89

# astype()

It is used to change the datatype of the values the array contains

In [28]:
arr = np.array([2.5, 7, 45.345, 56, 23])
print(arr.dtype)
print(arr.astype(int))
print(arr.dtype)


float64
[ 2  7 45 56 23]
float64


# Null values in Numpy

python - None
numpy -  np.nan

In numpy the null values are represented by np.nan. 
The type of np.nan is float 

In [31]:
arr = np.array([2.5, 7, np.nan, 56, 23, 45, np.nan, 67, np.nan, np.nan, 60, 70]).reshape(3,4)
arr

array([[ 2.5,  7. ,  nan, 56. ],
       [23. , 45. ,  nan, 67. ],
       [ nan,  nan, 60. , 70. ]])

In [32]:
arr.dtype

dtype('float64')

# Operations on null values

1. Count of null values
2. Count of all non-null values
3. Accessing the null values
4. Accessing all the non-null values
5. Replacing the null values

Count of null values

In [34]:
np.isnan(arr)

array([[False, False,  True, False],
       [False, False,  True, False],
       [ True,  True, False, False]])

In [35]:
np.isnan(arr).sum()

4

Accessing the indexed of all the null values

In [36]:
np.where(np.isnan(arr))
# Returns row index and column index, intersection of row and column gives the position of null element

(array([0, 1, 2, 2]), array([2, 2, 0, 1]))

Accessing non-null values

In [37]:
arr[np.isnan(arr)==False]

array([ 2.5,  7. , 56. , 23. , 45. , 67. , 60. , 70. ])

In [38]:
~(np.isnan(arr))

array([[ True,  True, False,  True],
       [ True,  True, False,  True],
       [False, False,  True,  True]])

Replacing the null values

In [41]:
#Replacing the null values by 0
arr[np.isnan(arr)]=0
arr

array([[ 2.5,  7. ,  0. , 56. ],
       [23. , 45. ,  0. , 67. ],
       [ 0. ,  0. , 60. , 70. ]])

In [42]:
# replace all the null values by mean value
# Mean returns nan is any elements in array is null, remove all non-null values
arr = np.array([2.5, 7, np.nan, 56, 23, 45, np.nan, 67, np.nan, np.nan, 60, 70]).reshape(3,4)
arr.mean()

nan

In [43]:
m=arr[~(np.isnan(arr))].mean()
m

41.3125

# np.argwhere()
It returns the indexes of all the non zero elements from the array

In [45]:
np.argwhere(arr)

array([[0, 0],
       [0, 1],
       [0, 2],
       [0, 3],
       [1, 0],
       [1, 1],
       [1, 2],
       [1, 3],
       [2, 0],
       [2, 1],
       [2, 2],
       [2, 3]])

In [46]:
np.argwhere(arr>=0)

array([[0, 0],
       [0, 1],
       [0, 3],
       [1, 0],
       [1, 1],
       [1, 3],
       [2, 2],
       [2, 3]])

# np.argmax()
It returns the index of maximum number.
It reads the nD array as 1-D array 

In [48]:
#To convert nD array to 1D array 
np.ravel(arr)

array([ 2.5,  7. ,  nan, 56. , 23. , 45. ,  nan, 67. ,  nan,  nan, 60. ,
       70. ])

In [47]:
np.argmax(arr) # Argmax converts nD array to 1D array

2

# np.argmin()
It returns the undex of minimum number in the array
It always returns the first element if we have multiple same min values

# np.where()