# NumPy

In [1]:
# Let’s create an numpy array.

import numpy as np
arr = np.array([1,3,4,5,6])
arr

array([1, 3, 4, 5, 6])

In [3]:
# shape attribute of the array object will tell us 
# about the dimensions of the array.
arr.shape

(5,)

In [4]:
arr.dtype

dtype('int32')

In [5]:
arr = np.array([1,'st','er',3])
arr.dtype

dtype('<U11')

In [6]:
np.sum(arr) # check the error, when performing functions on non-numeric array

TypeError: cannot perform reduce with flexible type

In [20]:
arr = np.array([[1,2,3],[2,4,6],[8,8,8]])
arr.shape

(3, 3)

In [21]:
arr

array([[1, 2, 3],
       [2, 4, 6],
       [8, 8, 8]])

In [22]:
arr = np.zeros((2,4))
arr

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [23]:
arr = np.ones((2,4))
arr

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [24]:
arr = np.identity(3)
arr

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [25]:
arr = np.random.randn(3,4)
arr

array([[ 0.52384132,  0.82212868, -1.9259853 ,  1.77399331],
       [ 1.83541595, -0.98894596,  1.23020945,  1.65146132],
       [ 2.07872988, -0.36904524, -0.66039494,  2.12354199]])

In [7]:
from io import BytesIO
b = BytesIO(b"2,23,33\n32,42,63.4\n35,77,12")
arr = np.genfromtxt(b, delimiter="*")
arr

# try this to understand the importance of delimiter
#b = BytesIO(b"2*23*33\n32*42*63.4\n35*77*12")
#arr = np.genfromtxt(b, delimiter="*")
#arr

array([[ 2. , 23. , 33. ],
       [32. , 42. , 63.4],
       [35. , 77. , 12. ]])

# Basic Indexing and Slicing

In [27]:
#### Accessing array elements
#### Simple indexing

arr[1]

array([32. , 42. , 63.4])

In [28]:
arr = np.arange(12).reshape(2,2,3)
arr

# arange(n) means elements from 0 to 11
# reshape() -> shapes the elements into dimensions given
# In above case 2,2,3 -> 2 arrays , of 2 rows and 3 columns

array([[[ 0,  1,  2],
        [ 3,  4,  5]],

       [[ 6,  7,  8],
        [ 9, 10, 11]]])

In [29]:
arr[0]  # prints the 0th array

array([[0, 1, 2],
       [3, 4, 5]])

In [30]:
# concept of slicing arrays 
arr = np.arange(10)
arr[5:]  # this fetches all elements from index 5 to last

array([5, 6, 7, 8, 9])

In [31]:
arr[5:8]  # slice elements from 5 to (8-1)

array([5, 6, 7])

In [32]:
arr[:-5] # slices from 0 to (last-5)
# remember where were you write the negative index (say -n) , 
# it means  last-n
# for example : arr[-4:]  -> means start from (last-4) to the end
# another example : arr[1:-4]  -> means start from 1 to (last-4)

array([0, 1, 2, 3, 4])

In [23]:
arr = np.arange(12).reshape(2,2,3)
arr

array([[[ 0,  1,  2],
        [ 3,  4,  5]],

       [[ 6,  7,  8],
        [ 9, 10, 11]]])

In [14]:
arr[1:2]  # accessing 1st array, skipping 0th. 
          # 2nd parameter 2 indicates 2 rows to be read.

array([[[ 6,  7,  8],
        [ 9, 10, 11]]])

In [35]:
arr[1:2:] # prints the elements of the 1st array, both the rows.

# you can experiment with arr[1:1:]    
# as well experiment with arr[1:1]
# why we get ?
# array([], shape=(0, 2, 3), dtype=int32)

# do Read this 
# https://stackoverflow.com/questions/509211/understanding-slice-notation

array([[[ 6,  7,  8],
        [ 9, 10, 11]]])

In [36]:
arr[:,:,2]   # accessing the 2nd column from all the arrays.

array([[ 2,  5],
       [ 8, 11]])

In [37]:
arr[...,2] # using dot notation i.e ... followed with no. of the column

array([[ 2,  5],
       [ 8, 11]])

In [27]:
#arr[...,1] # here 1 means values of 1st column

# can you figure out, how would you print only a single row ? try this :
# arr[1][1]

array([ 9, 10, 11])

In [28]:
arr = np.arange(9).reshape(3,3)
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

# Advanced Indexing

In [40]:
arr[[0,1,2],[1,0,0]]

array([1, 3, 6])

# Boolean Indexing

In [41]:
cities = np.array(["delhi","banglaore","mumbai","chennai","bhopal"])
city_data = np.random.randn(5,3)
city_data

array([[ 0.22997462,  0.86645137,  0.52166027],
       [-0.96586636, -0.64828548, -0.37461901],
       [ 0.48995021, -1.08439644, -1.44562051],
       [ 1.09462689,  0.10329788,  1.42239492],
       [ 0.70546274, -0.83368291,  0.26051589]])

In [42]:
city_data[cities =="mumbai"] 
# depending on which index is True, it prints that row

array([[ 0.48995021, -1.08439644, -1.44562051]])

In [43]:
print(cities =="mumbai")

[False False  True False False]


In [44]:
# if the match does not happen , then all values are false.
city_data[cities =="mumbra"] 
# the o/p in such a case is indicating shape as(0 rows, 3 features)

array([], shape=(0, 3), dtype=float64)

In [45]:
city_data[city_data >0]

array([0.22997462, 0.86645137, 0.52166027, 0.48995021, 1.09462689,
       0.10329788, 1.42239492, 0.70546274, 0.26051589])

In [46]:
city_data[city_data >0] = 0  # all positive values would be made 0
city_data

array([[ 0.        ,  0.        ,  0.        ],
       [-0.96586636, -0.64828548, -0.37461901],
       [ 0.        , -1.08439644, -1.44562051],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        , -0.83368291,  0.        ]])

# Operations on Arrays

In [47]:
arr = np.arange(15).reshape(3,5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [48]:
# we are adding a constant to all elements of the array
arr + 5  # most Ufunc return a array 

array([[ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [49]:
arr * 2

array([[ 0,  2,  4,  6,  8],
       [10, 12, 14, 16, 18],
       [20, 22, 24, 26, 28]])

In [50]:
arr1 = np.arange(15).reshape(5,3)
arr2 = np.arange(5).reshape(5,1)
arr2 + arr1   # arr2[i] + arr1's entire row

array([[ 0,  1,  2],
       [ 4,  5,  6],
       [ 8,  9, 10],
       [12, 13, 14],
       [16, 17, 18]])

# Linear Algebra Using numpy

In [51]:
# #### Linear algebra using numpy

A = np.array([[1,2,3],[4,5,6],[7,8,9]])
B = np.array([[9,8,7],[6,5,4],[1,2,3]])
A.dot(B)  # its simple array multiplication

array([[ 24,  24,  24],
       [ 72,  69,  66],
       [120, 114, 108]])

In [52]:
# taking Transpose
A = np.arange(15).reshape(3,5)  # 3 rows and 5 columns each
A.T    # after transpose , we get 5 rows and 3 columns each

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [53]:
np.linalg.svd(A)  # linalg is the Linear Algebra package

(array([[-0.15425367,  0.89974393,  0.40824829],
        [-0.50248417,  0.28432901, -0.81649658],
        [-0.85071468, -0.3310859 ,  0.40824829]]),
 array([3.17420265e+01, 2.72832424e+00, 8.10792259e-16]),
 array([[-0.34716018, -0.39465093, -0.44214167, -0.48963242, -0.53712316],
        [-0.69244481, -0.37980343, -0.06716206,  0.24547932,  0.55812069],
        [ 0.49916309, -0.8355069 ,  0.19887686,  0.1121146 ,  0.02535234],
        [-0.30036899, -0.03396104,  0.33716014,  0.62903881, -0.63186892],
        [-0.24620048, -0.02783651,  0.80422076, -0.54013007,  0.0099463 ]]))

In [54]:
a = np.array([[7,5,-3], [3,-5,2],[5,3,-7]])
b = np.array([16,-8,0])
x = np.linalg.solve(a, b)
x                             # will print the values of x,y, and z

array([1., 3., 2.])

In [55]:
# Finding the Inverse of a Matrix
# The NumPy library contains the ìnv function in the linalg module.

# let's find the inverse of a 2x2 matrix.
Y = np.array(([1,2], [3,4]))  
Z = np.linalg.inv(Y)  
print(Z)  

# How to find the inverse or determinant of a matrix ?
# https://www.mathsisfun.com/algebra/matrix-inverse.html

[[-2.   1. ]
 [ 1.5 -0.5]]


In [56]:
# Finding the Determinant of a Matrix
# The determinant of a matrix can be calculated using the det method 

X = np.array(([1,2,3], [4,5,6], [7,8,9]))
Z = np.linalg.det(X)
print(Z)  

6.66133814775094e-16


In [57]:
# Finding the Trace of a Matrix
# The trace of a matrix is the sum of all the elements in the diagonal 
# of a matrix. The NumPy library contains trace function that can be 
# used to find the trace of a matrix.

X = np.array(([1,2,3], [4,5,6], [7,8,9]))
Z = np.trace(X)
print(Z) 

15


# Working of NumPy’s broadcasting functionality

In [58]:
import numpy as np

a = np.array([0, 1, 2])
b = np.array([5, 5, 5])

a + b

array([5, 6, 7])

In [59]:
a + 5

array([5, 6, 7])

In [60]:
M = np.ones((3, 3))
M

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [61]:
M + a

array([[1., 2., 3.],
       [1., 2., 3.],
       [1., 2., 3.]])

In [62]:
a = np.arange(3)
b = np.arange(3)[:, np.newaxis]

print(a)
print(b)

[0 1 2]
[[0]
 [1]
 [2]]


In [63]:
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

![image for visualising Broadcasting](images/broadcasting_visual.png "Broadcasting Images")

# Structured Data: NumPy’s Structured Arrays

In [29]:
name = ['Alice', 'Bob', 'Cathy', 'Douglas']
age = [25, 45, 37, 19]
weight = [55.0, 85.5, 68.0, 61.5]

# But this is a bit clumsy. 
# There’s nothing here that tells us that the three arrays are related; 
# it would be more natural if we could use a single structure to 
# store all of this data. 
# NumPy can handle this through structured arrays, 
# which are arrays with compound data types.

In [30]:
# Use a compound data type for structured arrays
data = np.zeros(4, dtype={'names':('name', 'age', 'weight'),
'formats':('U10', 'i4', 'f8')})
print(data.dtype)

# Please Note :  np.zeros(4 -> indicates the no. of rows , we want. 

[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')]


In [31]:
# Now that we’ve created an empty container array, 
# we can fill the array with our lists of values:

data['name'] = name
data['age'] = age
data['weight'] = weight
print(data)

[('Alice', 25, 55. ) ('Bob', 45, 85.5) ('Cathy', 37, 68. )
 ('Douglas', 19, 61.5)]


In [32]:
# The handy thing with structured arrays is that you can now refer 
# to values either by index or by name:

# Get all names
data['name']

array(['Alice', 'Bob', 'Cathy', 'Douglas'], dtype='<U10')

In [68]:
# Get first row of data
data[0]

('Alice', 25, 55.)

In [33]:
# Get the name from the last row
data[-1]['name']

'Douglas'

In [34]:
# Using Boolean masking, this even allows you to do some more 
# sophisticated operations, such as filtering on age:

# Get names where age is under 30
data[data['age'] < 30]['name']

array(['Alice', 'Douglas'], dtype='<U10')