# Matrix Computation and Data Analysis Using Numpy and PANDAS

![image.png](attachment:image.png)

![image-2.png](attachment:image-2.png)


![image.png](attachment:image.png)

In [6]:
import numpy as np  # import Numpy Library

x=np.array([23,45,78,2])  # Syntax to create 1-D array (input is a list)
print(x)
print(x.size) # number of items
print(x.ndim) # dimension of array
print(x.dtype)
print(x.itemsize)  # size of each item in bytes 
print(x.shape) # no of rows and columns



[23 45 78  2]
4
1
int32
4
(4,)


In [7]:
x[2]=24   # arrays are mutable
print(x)
y=x*np.array([0,1,0,1])  # element wise multiplication (can't do it in list)
print(y)

[23 45 24  2]
[ 0 45  0  2]


### array vs lists

In [10]:
x=[1,2,3] # list
y=np.array([1,2,3]) # array
print(x) 
print(y)
print(x+[4])
print(y+ny.array([4])) # interesting (adding 4 to every element of array)


[1, 2, 3]
[1 2 3]
[1, 2, 3, 4]
[5 6 7]


In [11]:
print(x*2) # appending the same list at the end
print(y*2) # squaring every element 
print(ny.sqrt(y))  # square root of each element


[1, 2, 3, 1, 2, 3]
[2 4 6]
[1.         1.41421356 1.73205081]


### vector operations like inner and outer products in Numpy

In [12]:
l1=[0.1,0.2,0.3]
l2=[0.3,0.2,0.1]
x=np.array(l1)
y=np.array(l2)
# dot product 1st approach
dpr1=x@y
print(dpr1)
# dot product 2nd approach
dpr2=np.dot(x,y)
print(dpr2)
# dot product 3rd approach
muli=x*y
print(muli)
dpr3=muli.sum()
print(dpr3)
# dot product 4th approach (without Numpy)
dpr4=0
for j in range(len(l1)):
    dpr4= l1[j]*l2[j] + dpr4
print(dpr4)


0.1
0.1
[0.03 0.04 0.03]
0.1
0.1


In [16]:
l1=[0.1,0.2,0.3]
l2=[0.3,0.2,0.1]
print(np.dot(l1,l2)) # np.dot can directly act on lists too
print(np.inner(l1,l2)) # inner and outer products can act on lists too
print(np.outer(l1,l2))

0.1
0.1
[[0.03 0.02 0.01]
 [0.06 0.04 0.02]
 [0.09 0.06 0.03]]


### matrices or multi-dimensional arrays

In [18]:
x = np.array([[2,5], [7,8]])
print(x)
print(x.shape)

# row first, then columns
print(x[0]) # 1st row with index 0

# 1st element of 1st row
print(x[0][0])
print(x[0,0])

# slicing
print(x[:,0]) # all rows in col 0 (be careful, you expect a column but it is in row format)
print(x[0,:]) # all columns in row 0


[[2 5]
 [7 8]]
(2, 2)
[2 5]
2
2
[2 7]
[2 5]


In [27]:
# transpose
print(x.T)

# matrix multiplication
y = np.array([[3, 4], [5,6]])
z = x.dot(y)
print(z)
d = x * y # elementwise multiplication
print(d)


# be careful about matrix multiplication (inner dimesions must match)
b = ny.array([[10,2,3], [4,5,6]])
print(y.dot(b))
#print(y.dot(b.T))

[[2 7]
 [5 8]]
[[31 38]
 [61 76]]
[[ 6 20]
 [35 48]]
[[46 26 33]
 [74 40 51]]


In [28]:
# can create zeros and one matrices easily

a=np.zeros((2,2))
print(a)
b=np.ones((2,2))
print(b)

[[0. 0.]
 [0. 0.]]
[[1. 1.]
 [1. 1.]]


### linalg module

In [29]:
# determinant
c1 = np.linalg.det(x)
print(c1)

# inverse
c2 = np.linalg.inv(x)
print(c2)

# diag
c3 = np.diag(x)  # returns diagonal elemnts if argument is a matrix
print(c3)

# diag on a vector returns diagonal matrix (overloaded function)
c4 = np.diag([1,4])  # return a diagonal matrix if argument is a vector
print(c4)


-18.999999999999996
[[-0.42105263  0.26315789]
 [ 0.36842105 -0.10526316]]
[2 8]
[[1 0]
 [0 4]]


### indexing multidimesional arrays

In [32]:
# Since arrays may be multidimensional, you must specify a slice for each 
# dimension of the array:
x = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print(x)

# Integer array indexing
b = x[0,1] # element belonging to 0th indexed row and 1th index column
print(b)

row0 = x[0,:]
print(row0)

col0 = x[:, 0]
print(col0)

# indexing starting from the end: -1, -2 etc...
last = x[-1,-1]
print(last)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
2
[1 2 3 4]
[1 5 9]
12


In [70]:
# can take a part of matrix too

slice_x = x[0:2,1:3]  # elements of x which are in rows (0, 1) and columns (1,2)
print(slice_x)



x = ny.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
slice_x1 = x[::2,::2]  # elements of alternate rows and columns
print(slice_x1)






[[2 3]
 [6 7]]
[[ 1  3]
 [ 9 11]]


### Boolean and Fancy Indexing (important in real life applications)

In [40]:
a = np.array([[1,2], [3, 4], [5, 6]])
print(a)

# same shape with True or False for the condition
bool_idx = a > 2  
print(bool_idx) 
print(a[bool_idx]) # note: this will be a rank 1 array! (returns elements of the matrix for which index is True)

# We can do all of the above in a single concise statement:
print(a[a > 2]) # however, still returns a 1-D array


# np.where(): same size with modified values
b = np.where(a>2, a, 0) # if you want to replace something else except 0, you can do so
print(b)
b = np.where(a>2, a, -2) # if you want to replace something else except 0, you can do so
print(b)

[[1 2]
 [3 4]
 [5 6]]
[[False False]
 [ True  True]
 [ True  True]]
[3 4 5 6]
[3 4 5 6]
[[0 0]
 [3 4]
 [5 6]]
[[-2 -2]
 [ 3  4]
 [ 5  6]]


In [60]:
# fancy indexing: access multiple indices at once
a = np.array([10,19,30,41,50,61])
print(a)
b = a[[1,3,5]]
print(b)
even = np.argwhere(a%2==0)# return the index of the elements for which (a % 2==0) is True
print(even)
even = np.argwhere(a%2==0).flatten()
print(even)
a_even = a[even]
print(a_even)

a1 = np.array([[10,19,30],[41,50,61]])
print(a1)
b1 = a1[[1,1],[1,0]]
print(b1)
even1 = np.argwhere(a1%2==0)
print(even1)


[10 19 30 41 50 61]
[19 41 61]
[[0]
 [2]
 [4]]
[0 2 4]
[10 30 50]
[[10 19 30]
 [41 50 61]]
[50 41]
[[0 0]
 [0 2]
 [1 1]]


In [71]:
# reshaping is an useful tool in ML
# reshape
x = np.arange(1, 7)
print(x)
y = x.reshape((2, 3)) # error if shape cannot be used
print(y)
c = x.reshape((3, 2)) # 3 rows, 2 columns
print(c)


[1 2 3 4 5 6]
[[1 2 3]
 [4 5 6]]
[[1 2]
 [3 4]
 [5 6]]


In [73]:
# newaxis is used to create a new axis in the data
# needed when model require the data to be shaped in a certain manner
x = np.arange(1, 7)
print(x)
print(x.shape)
d = x[np.newaxis, :]  # same data in the 1st row with a new dimension
print(d)
print(d.shape)

e = x[:, np.newaxis]   # same data in the 1st row with a new dimension
print(e)
print(e.shape)



[1 2 3 4 5 6]
(6,)
[[1 2 3 4 5 6]]
(1, 6)
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
(6, 1)


In [98]:
a = np.array([[7,8,9,10,11,12,13], [17,18,19,20,21,22,23]])
print(a)

print(a.sum(axis=None)) # overall sum
print(a.sum())

# along the rows -> 1 sum entry for each column
print(a.sum(axis=0)) 

 # along the columns -> 1 sum entry for each row
print(a.sum(axis=1))


[[ 7  8  9 10 11 12 13]
 [17 18 19 20 21 22 23]]
210
210
[24 26 28 30 32 34 36]
[ 70 140]


### array  concatenation

In [78]:
# concatenation
a = np.array([[1, 2], [3, 4]])
print(a)
b = np.array([[5, 6]])
print(b)
c = np.concatenate((a, b), axis=None) # flatten the data in two matrices nd combine them in a 1D array
print(c)
d = np.concatenate((a, b), axis=0)  # concatenate along the row
print(d)
e = np.concatenate((a, b.T), axis=1) # concatenate along the column  (mind the dimensions, note the transpose while conctenation)
print(e)

[[1 2]
 [3 4]]
[[5 6]]
[1 2 3 4 5 6]
[[1 2]
 [3 4]
 [5 6]]
[[1 2 5]
 [3 4 6]]


In [81]:
# stacking of one matrix on anaother horizontally and vertically

a = np.array([1,2,3,4])
b = np.array([5,6,7,8])
c = np.hstack((a,b))
print(c)
a = np.array([[1,2], [3,4]])
b = np.array([[5,6], [7,8]])
c = np.hstack((a,b))  # horizontal stack
print(c)

c2 = np.vstack((a,b)) # vertical stack
print(c2)

c1 = np.concatenate((a,b), axis=1) # same thing can be done with concatenate
print(c1)
#c1 = np.concatenate((a,b), axis=0) # same thing can be done with concatenate
#print(c1)

[1 2 3 4 5 6 7 8]
[[1 2 5 6]
 [3 4 7 8]]
[[1 2 5 6]
 [3 4 7 8]]
[[1 2]
 [3 4]
 [5 6]
 [7 8]]


### Broadcasting
![image.png](attachment:image.png)

In [86]:
# broadcasting
# Broadcasting is a powerful mechanism that allows numpy to work with arrays of 
# different shapes when performing arithmetic operations.

x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
print(x)
v = np.array([1, 0, 1])
y = x + v  # Add v to each row of x using broadcasting
print(y)


x1 = np.array([[0],[10],[20],[30]])
print(x1)
v1 = np.array([0, 1, 2])
y1 = x1 + v1 
print(y1)

[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]
[[ 0]
 [10]
 [20]
 [30]]
[[ 0  1  2]
 [10 11 12]
 [20 21 22]
 [30 31 32]]


In [88]:
# copy

a1 = np.array([1,2,3])
b1 = a1 # only copies reference!
b1[0] = 42  
print(a1)

a2 = np.array([1,2,3])
b2 = a2.copy() # actual copy!  (use this more often)
b2[0] = 42
print(a2)


[42  2  3]
[1 2 3]


In [91]:
# generating data
a = np.zeros((2,3)) # size as tuple
b = np.ones((2,3))
print(a)
print(b)
# specific value
c = 5 * np.ones((3,3))
print(c)
c = np.full((3,3),5.0)
print(c)
# identity
d = np.eye(3) #3x3
print(d)
# arange
e = np.arange(10,20)
print(e)
# linspace
f = np.linspace(0, 10, 5)
print(f)


[[0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]]
[[5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[10 11 12 13 14 15 16 17 18 19]
[ 0.   2.5  5.   7.5 10. ]


### random arrays 

In [95]:
# random numbers
a = np.random.random((3,2)) # uniform 0-1 distribution
print(a)
b = np.random.randn(3,2) # normal/Gaussian distribution, mean 0 and unit variance
# no tuple as shape here! each dimension one argument
print(b)

R = np.random.randn(10000)
print(R.shape)
print(R.mean(), R.var(), R.std())
R = np.random.randn(10, 3)
print(R.shape)
print(R.mean()) # mean of whole matrix of data


[[0.39125316 0.48002864]
 [0.11522943 0.68875263]
 [0.65629539 0.13053562]]
[[ 0.5285839  -0.10650102]
 [-1.94934871 -0.14680926]
 [-0.6061811   2.42972748]]
(10000,)
0.005338868702816718 0.9777620646767243 0.9888185195862406
(10, 3)
0.12280343915641916


In [97]:
# random integer, low,high,size; high is exclusive
R = np.random.randint(3,10,size=(3,3)) # if we only pass one parameter, then from 0-x
print(R)

# with integer is between 0 up to integer exclusive
c = np.random.choice(7, size=10)
print(c)
# with an array it draws random values from this array
d = np.random.choice([1,2,3,4], size=8)
print(d)


[[6 9 9]
 [9 9 4]
 [7 6 5]]
[1 5 2 5 6 3 3 4 6 4]
[4 4 2 1 2 4 2 2]


### eigen decomposition (useful to solve modelling problems and analyze differential equations)

In [101]:
a = np.array([[1,2], [3,4]])
eigenvalues, eigenvectors = np.linalg.eig(a)
# Note: use eigh if your matrix is symmetric (faster)
print(eigenvalues)
print(eigenvectors) # column vectors
print(eigenvectors[:,0]) # column 0 corresponding to eigenvalue[0]

# verify: e-vec * e-val = A * e-vec
d = eigenvectors[:,1] * eigenvalues[1] # 2nd eigen value and corresponding eigenvector
e = a @ eigenvectors[:, 1]
print(d, e)
print(d == e) # numerical issues

# correct way to compare matrix
print(ny.allclose(d,e))


[-0.37228132  5.37228132]
[[-0.82456484 -0.41597356]
 [ 0.56576746 -0.90937671]]
[-0.82456484  0.56576746]
[-2.23472698 -4.88542751] [-2.23472698 -4.88542751]
[ True  True]
True


In [102]:
# solve linear system
# x1+x2=2200
# 1.5 x1 + 4 x2 = 5050
# 2 equations and 2 unknowns
import numpy as ny
A = ny.array([[1, 1], [1.5, 4]])
b = ny.array([2200,5050])
# Ax = b <=> x = A-1 b
# But: inverse is slow and less accurate
x = ny.linalg.inv(A).dot(b) # not recommended
print(x)
x = ny.linalg.solve(A,b) # good
print(x)


[1500.  700.]
[1500.  700.]


![image.png](attachment:image.png)
![image-2.png](attachment:image-2.png)