## Machine Learning - Session 1 - NumPy

In [171]:
#Import the numpy library
import numpy as np

#### A. Defining a 1D numpy array

In [4]:
x = np.array([12,13,15,16,18,7,10])

In [5]:
x

array([12, 13, 15, 16, 18,  7, 10])

#### B. Creating Regular Sequence

In [7]:
#Create a regular sequence of integers starting from 1 to 10. integers = c(1,2,3,4,.......,10)

x1 = np.arange(1,11)
x1

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [9]:
#Create a regular sequence of integers starting from 10 to 1.

x2 = np.arange(10,0,-1)
x2

array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1])

In [10]:
#Create a regular sequence of even integers between 10 to 20

even = np.arange(10,20,2)
even


array([10, 12, 14, 16, 18])

In [13]:
#Create a regular sequence of the following type: [10.0,10.5,11.0,11.5, ... , 19.5, 20.0]

x3 = np.arange(10,21,0.5)
x3

array([10. , 10.5, 11. , 11.5, 12. , 12.5, 13. , 13.5, 14. , 14.5, 15. ,
       15.5, 16. , 16.5, 17. , 17.5, 18. , 18.5, 19. , 19.5, 20. , 20.5])

In [28]:
#Create a regular sequence of length 20 ranging from 0 to 5

x4 = np.round(np.linspace(0,5,20,endpoint=True),2)
x4

array([0.  , 0.26, 0.53, 0.79, 1.05, 1.32, 1.58, 1.84, 2.11, 2.37, 2.63,
       2.89, 3.16, 3.42, 3.68, 3.95, 4.21, 4.47, 4.74, 5.  ])

In [18]:
#Help for np.linspace

help(np.linspace)

Help on function linspace in module numpy:

linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0)
    Return evenly spaced numbers over a specified interval.
    
    Returns `num` evenly spaced samples, calculated over the
    interval [`start`, `stop`].
    
    The endpoint of the interval can optionally be excluded.
    
    .. versionchanged:: 1.16.0
        Non-scalar `start` and `stop` are now supported.
    
    Parameters
    ----------
    start : array_like
        The starting value of the sequence.
    stop : array_like
        The end value of the sequence, unless `endpoint` is set to False.
        In that case, the sequence consists of all but the last of ``num + 1``
        evenly spaced samples, so that `stop` is excluded.  Note that the step
        size changes when `endpoint` is False.
    num : int, optional
        Number of samples to generate. Default is 50. Must be non-negative.
    endpoint : bool, optional
        If True, `stop` is

In [16]:
#Studying the arange function

help(np.arange)

Help on built-in function arange in module numpy:

arange(...)
    arange([start,] stop[, step,], dtype=None)
    
    Return evenly spaced values within a given interval.
    
    Values are generated within the half-open interval ``[start, stop)``
    (in other words, the interval including `start` but excluding `stop`).
    For integer arguments the function is equivalent to the Python built-in
    `range` function, but returns an ndarray rather than a list.
    
    When using a non-integer step, such as 0.1, the results will often not
    be consistent.  It is better to use `numpy.linspace` for these cases.
    
    Parameters
    ----------
    start : number, optional
        Start of interval.  The interval includes this value.  The default
        start value is 0.
    stop : number
        End of interval.  The interval does not include this value, except
        in some cases where `step` is not an integer and floating point
        round-off affects the length of `out`.
   

#### C. Creating a Random Sequence

In [39]:
#Generate 20 random numbers between 1 and 100 (With replacement)
np.random.seed(123) # Same random no generated all the time
x5 = np.random.randint(1,100,20)
x5


array([67, 93, 99, 18, 84, 58, 87, 98, 97, 48, 74, 33, 47, 97, 26, 84, 79,
       37, 97, 81])

In [40]:
# Generate random no between 0 and 1

np.random.seed(0)
x6 = np.random.random(20)
x6

array([0.5488135 , 0.71518937, 0.60276338, 0.54488318, 0.4236548 ,
       0.64589411, 0.43758721, 0.891773  , 0.96366276, 0.38344152,
       0.79172504, 0.52889492, 0.56804456, 0.92559664, 0.07103606,
       0.0871293 , 0.0202184 , 0.83261985, 0.77815675, 0.87001215])

In [60]:
#Shuffle the vector x

x7 = np.array([10,2,3,55,6,7,18,19])
np.random.shuffle(x7)
x7


array([ 3,  7, 18, 19, 55,  6,  2, 10])

In [65]:
#Permute x and return it

np.random.choice(x7,5,replace=False)  # without replacement 

array([10,  7,  6,  2, 19])

In [67]:
np.random.choice(x7,5,replace=True)  # with replacement

array([ 6,  6,  2,  3, 55])

To find more on numpy random sampling Go To this link 
https://docs.scipy.org/doc/numpy-1.12.0/reference/routines.random.html 

#### D. Some basic numpy methods

In [68]:
x = np.array([12,34,24,45,7,18])

In [70]:
#Size of the array
x.size

6

In [72]:
#data type
x.dtype

dtype('int32')

In [74]:
#Type of the object
type(x)

numpy.ndarray

In [77]:
#Type conversion
x = x.astype(float)

In [78]:
x.dtype


dtype('float64')

#### E. Some basic (1D) numpy operations

In [95]:
#Consider that we have two arrays

x = np.array([12,34,24,45,7,18])
y = np.array([3,6,8,5,1,10])

In [96]:
#Addition
x+y

array([15, 40, 32, 50,  8, 28])

In [97]:
#Subtraction
x-y

array([ 9, 28, 16, 40,  6,  8])

In [98]:
#Multiplication by scalar

10*y

array([ 30,  60,  80,  50,  10, 100])

In [99]:
#Element wise multiplication (dot product)
x*y

array([ 36, 204, 192, 225,   7, 180])

In [100]:
#Using dot method
x.dot(y)

844

In [101]:
np.sum(x*y)

844

In [103]:
y.dot(x)

844

In [104]:
print(x)

[12 34 24 45  7 18]


In [107]:
#Squaring each elements in an array
x**1/2

array([ 6. , 17. , 12. , 22.5,  3.5,  9. ])

In [108]:
np.sqrt(x)

array([3.46410162, 5.83095189, 4.89897949, 6.70820393, 2.64575131,
       4.24264069])

In [109]:
np.exp(x)

array([1.62754791e+05, 5.83461743e+14, 2.64891221e+10, 3.49342711e+19,
       1.09663316e+03, 6.56599691e+07])

In [110]:
np.log(x)

array([2.48490665, 3.52636052, 3.17805383, 3.80666249, 1.94591015,
       2.89037176])

#### F. Some Useful Numpy Functions

In [111]:
y = np.array([3,6,8,5,1,10])

In [112]:
#Sum
np.sum(y)

33

In [113]:
#Mean

np.mean(y)


5.5

In [114]:
#Standard Deviation
np.std(y)

2.9860788111948193

In [115]:
#Minimum
np.min(y)

1

In [116]:
#Maximum
np.max(y)

10

In [118]:
#Percentiles

np.percentile(y,25)

3.5

In [119]:
np.percentile(y,[25,60,75])

array([3.5, 6. , 7.5])

In [121]:
#Sorting

np.sort(y)

array([ 1,  3,  5,  6,  8, 10])

In [122]:
-np.sort(-y)

array([10,  8,  6,  5,  3,  1])

#### G. Subsetting a 1D numpy array

In [124]:
x = np.array([12,34,24,45,7,18])
y = np.array([3,6,8,5,1,10])

In [125]:
#Subsetting and slicing is same as list
 
x[0]

12

In [129]:
x[3:5]

array([45,  7])

In [130]:
x[[3,5]]

array([45, 18])

#### H. Conditional Subsetting

**The Comparison Operators**

      > is greater than
      < is less than
      == is equal to
      <= is less than equal to
      >= is greater than equal to

In [131]:
x = np.array([12,34,24,45,7,18])
y = np.array([3,6,8,5,1,10])

In [132]:
x > 20

array([False,  True,  True,  True, False, False])

In [133]:
np.sum(x>20)  # give the sum no of elements where condition is true

3

In [140]:
np.sum(x[x>20])   # give the sum of elements

103

**The Logical Operators**

    AND: &
    OR : |

In [139]:
(x>20) | (y>10)

array([False,  True,  True,  True, False, False])

In [138]:
(x>20) & (y<10)

array([False,  True,  True,  True, False, False])

**Problems**

In [53]:
#Consider the following two vectors

x = np.array([33,45,23,67,54,48])
y = np.array([108,151,164,119,135,122])

In [54]:
#a. Vaues of x that are less than 35

x[x < 35]

array([33, 23])

In [55]:
#b. The number of observations in y that are more than 150

sum(y > 150)


2

In [56]:
#OR


In [59]:
#c. The number of observations in y that are between 120 and 165

sum((y >= 120) & (y <= 150))

2

In [60]:
#d. The vaues in x that are less than 30 or greater than 50
x[((x < 30) | (x>50))]

array([23, 67, 54])

In [61]:
#e. The values in x for which the values in y is less than or equal to 120
x[y<120]

array([33, 67])

In [62]:
#f. The values in y for which the values in x is equal to 45
y[x==45]

array([151])

#### I. Defining a 2D numpy array

In [40]:
#Converting a 1D array into 2D array
x = np.array([33,45,23,67,54,48])
x

array([33, 45, 23, 67, 54, 48])

In [65]:
y = np.reshape(x,(3,2))

In [66]:
y

array([[33, 45],
       [23, 67],
       [54, 48]])

In [68]:
mat = x.reshape(2,3)
mat

array([[33, 45, 23],
       [67, 54, 48]])

In [70]:
#Create a 3x3 matrix containing elements from 1 to 9

mat1 = np.arange(1,10).reshape(3,3)
mat1

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [88]:
#Joining two 1D arrays to form a matrix
x = np.array([33,45,23,67,54,48])
y = np.array([108,151,164,119,135,122])

mat2 = np.array([x,y])
mat3 = np.array([x,y]).reshape(6,2)
mat2

array([[ 33,  45,  23,  67,  54,  48],
       [108, 151, 164, 119, 135, 122]])

In [90]:
mat3

array([[ 33,  45],
       [ 23,  67],
       [ 54,  48],
       [108, 151],
       [164, 119],
       [135, 122]])

In [84]:
np.transpose(mat2)

array([[ 33, 108],
       [ 45, 151],
       [ 23, 164],
       [ 67, 119],
       [ 54, 135],
       [ 48, 122]])

#### J. Some basic methods for 2D numpy array

In [76]:
matrix = np.array([[38,33,47],
                    [29,48,35],
                    [21,34,47],
                    [38,44,21],
                    [28,26,43]])
matrix

array([[38, 33, 47],
       [29, 48, 35],
       [21, 34, 47],
       [38, 44, 21],
       [28, 26, 43]])

In [79]:
#Checking the shape of the matrix
matrix.shape

(5, 3)

In [91]:
#Dimension of the array
matrix.ndim

2

In [92]:
#Data type of the elements in the array
matrix.dtype

dtype('int32')

In [95]:
#Number of elements present in the matrix
np.count_nonzero(matrix)

15

In [98]:
#Size of the matrix in bytes
matrix.size * matrix.itemsize

60

#### K. Subsetting 2D numpy array

In [99]:
#matrix
matrix


array([[38, 33, 47],
       [29, 48, 35],
       [21, 34, 47],
       [38, 44, 21],
       [28, 26, 43]])

In [101]:
#a) Print the first element of the matrix.

print(matrix[0,1])

33

In [103]:
#b) Print the last element of the matrix.
print(matrix[4,2])

43


In [None]:
#Or


In [104]:
#c) Print the values of the first row.
print(matrix[0,])

[38 33 47]


In [105]:
#d) Print the values of the fourth row.
print(matrix[3,])

[38 44 21]


In [109]:
#e) Print the values first column.
print(matrix[:,0])

[38 29 21 38 28]


In [112]:
#f) Calculate the total of the third row.
sum(matrix[2,])

102

In [113]:
#g) Calculate the total of the fifth row.
sum(matrix[4,])

97

In [114]:
#h) Calculate the total of the second column.
sum(matrix[:,1])

185

In [130]:
#i) Calculate the average of the values of the first row.

np.mean(matrix[0,])

27.0

In [118]:
#j) Replace the first value of the matrix by 50.

matrix[0,0] = 50
matrix


array([[50, 33, 47],
       [29, 48, 35],
       [21, 34, 47],
       [38, 44, 21],
       [28, 26, 43]])

In [121]:
#k) Replace the last value of the matrix by 30.

matrix[4,2] = 30
matrix

array([[50, 33, 47],
       [29, 48, 35],
       [21, 34, 47],
       [38, 44, 21],
       [28, 26, 30]])

In [128]:
#l) Replace the second column of the matrix by the vector (20,30,20,30,20)


mat7 = np.array([[20,30,20,30,20]])
matrix[:,1] = mat7
matrix

array([[31, 20, 31],
       [29, 30, 35],
       [21, 20, 47],
       [38, 30, 21],
       [28, 20, 30]])

In [129]:
#m) Replace the first row of the matrix by the vector its average (calculated above).

matrix[0,] = np.mean(matrix[0,])
matrix

array([[27, 27, 27],
       [29, 30, 35],
       [21, 20, 47],
       [38, 30, 21],
       [28, 20, 30]])

In [131]:
#q) Calculate the sum of all the elements of the matrix

np.sum(matrix)

430

In [132]:
#r) Calculate the sum of all the rows on the matrix

np.sum(matrix,axis=1)

array([81, 94, 88, 89, 78])

In [133]:
#s) Calculate the sum of all the colummns of the matrix

np.sum(matrix,axis=0)

array([143, 127, 160])

#### L. Matrix Operations

In [134]:
#Defining a matrix 'm'
m = np.array([[2,3],[4,5]])
m

array([[2, 3],
       [4, 5]])

In [135]:
#Defining a matrix 'n'
n = np.array([[1,0],[3,6]])
n

array([[1, 0],
       [3, 6]])

In [136]:
#Multiplication by scalar
2*m

array([[ 4,  6],
       [ 8, 10]])

In [137]:
#Matrix addition

m + n

array([[ 3,  3],
       [ 7, 11]])

In [138]:
#Matrix Multilication
np.dot(m,n)

array([[11, 18],
       [19, 30]])

In [139]:
#Preferred - for Matrix Multilication
m @ n


array([[11, 18],
       [19, 30]])

In [140]:
#Preferred - for Matrix Multilication
np.matmul(m,n)

array([[11, 18],
       [19, 30]])

In [141]:
#Element-wise array multilication
np.multiply(m,n)

array([[ 2,  0],
       [12, 30]])

#### Copying data

In [161]:
p = np.array([2,5,8,6])

In [162]:
p

array([2, 5, 8, 6])

In [163]:
q =  p

In [164]:
q

array([2, 5, 8, 6])

In [165]:
r = p.copy()

In [166]:
r

array([2, 5, 8, 6])

In [167]:
q[1] = 0

In [168]:
p

array([2, 0, 8, 6])

In [169]:
r[1]=1

In [170]:

p

array([2, 0, 8, 6])

## SVD

In [173]:
a = np.array([[7,3],[3,-1]])

In [174]:
a

array([[ 7,  3],
       [ 3, -1]])

In [176]:
u,s,vt = np.linalg.svd(a)

In [177]:

u

array([[-0.9486833 , -0.31622777],
       [-0.31622777,  0.9486833 ]])

In [178]:
s

array([8., 2.])

In [179]:
vt

array([[-0.9486833 , -0.31622777],
       [ 0.31622777, -0.9486833 ]])

In [180]:
w = u@s

In [181]:
w@vt

array([7.6, 3.2])

In [184]:
b = np.array([[5,5], [-1,7]])
b

array([[ 5,  5],
       [-1,  7]])

In [185]:
u,s,vt = np.linalg.svd(b)

In [186]:
u

array([[ 0.70710678,  0.70710678],
       [ 0.70710678, -0.70710678]])

In [187]:
s

array([8.94427191, 4.47213595])

In [188]:
vt

array([[ 0.31622777,  0.9486833 ],
       [ 0.9486833 , -0.31622777]])

In [190]:
q = u@s


array([6., 8.])

In [192]:
np.linalg.eig(b)

(array([6.+2.j, 6.-2.j]),
 array([[0.91287093+0.j        , 0.91287093-0.j        ],
        [0.18257419+0.36514837j, 0.18257419-0.36514837j]]))