# Numpy Basics - List vs Arrays

Numpy arrays are specifically used for doing maths operations whereas List are just general data structures

In [None]:
import numpy as np

In [29]:
# List
L=[1,2,3]

In [30]:
# Array
A = np.array([1,2,3])

In [31]:
for e in L:
    print(e)

1
2
3


In [32]:
for e in A:
    print(e)

1
2
3


In [33]:
# to append an element
L.append(4)

In [34]:
L

[1, 2, 3, 4]

In [35]:
# cant use append in arrays
A.append(4)

AttributeError: 'numpy.ndarray' object has no attribute 'append'

In [36]:
# can add element to a list by simple addition
L + [5]

[1, 2, 3, 4, 5]

In [37]:
# if we use + operator in array, it simply adds up that number to each element
A + np.array([4])

array([5, 6, 7])

In [39]:
# but if we give 3 values then it does element wise addition
A + np.array([4,5,6])

array([5, 7, 9])

In [40]:
 # cant add vectors of different sizes
    A + np.array([4,5])

ValueError: operands could not be broadcast together with shapes (3,) (2,) 

In [41]:
# multiplication in numpy array
A*2

array([2, 4, 6])

In [42]:
# in list it does repition
2*L

[1, 2, 3, 4, 1, 2, 3, 4]

In [43]:
L +L

[1, 2, 3, 4, 1, 2, 3, 4]

In [45]:
L2=[]
for e in L:
    L2.append(e+2)
L2

[3, 4, 5, 6]

In [46]:
# can use List comprehensions
L2=[ e+2 for e in L ]

In [47]:
L2

[3, 4, 5, 6]

In [48]:
# mathematical operators not supported directly
L**2

TypeError: unsupported operand type(s) for ** or pow(): 'list' and 'int'

In [50]:
# can do any operation element wise
L2 = [e**2 for e in L]
L2

[1, 4, 9, 16]

In [51]:
# can directly do in numpy - can apply any function element wise directly
A**2

array([1, 4, 9])

In [57]:
# can use any mathematical functions in numpy array
np.sqrt(A)

array([1.        , 1.41421356, 1.73205081])

In [53]:
np.log(A)

array([0.        , 0.69314718, 1.09861229])

In [54]:
np.exp(A)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [55]:
np.tanh(A)

array([0.76159416, 0.96402758, 0.99505475])

In [56]:
np.cos(A)

array([ 0.54030231, -0.41614684, -0.9899925 ])

# Dot Product

In [3]:
a=np.array([1,2])
b=np.array([3,4])

In [5]:
a+b

array([4, 6])

In [8]:
dot=0
for e, f in zip(a,b):
    dot+=e*f
dot

11

In [10]:
dot=0
for i in range(len(a)):
    dot+=a[i]*b[i]
dot

11

In [11]:
# element wise product
a*b

array([3, 8])

In [12]:
# can use np.sum
np.sum(a*b)

11

In [13]:
(a*b).sum()

11

In [14]:
# dot product
np.dot(a,b)

11

In [15]:
# can also use dot like this
a.dot(b)

11

In [16]:
# latest versions of numpy use @ symbol
a @ b

11

In [17]:
# a^t b = ||a|| ||b|| cos theta

In [18]:
# to find magnitude of a vector

In [20]:
amag = np.sqrt((a*a).sum())

In [21]:
amag

2.23606797749979

In [22]:
# to find the norm of a vector - can also use the linear algebra norm function
np.linalg.norm(a)

2.23606797749979

In [24]:
cosangle = a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b))

In [25]:
cosangle

0.9838699100999074

In [27]:
# to print the angle
angle = np.arccos(cosangle)

In [28]:
angle

0.17985349979247847

# Matrices

Matrices are dimensional arrays. Not recommended to use numpy.matrix 

Should convert a matrix to an array first and then use it before any data processing

In [58]:
# print a 2X2 matrix using List 
L =[[1,2], [3,4]]

In [59]:
L

[[1, 2], [3, 4]]

In [60]:
# how to access the elements
# first row of L
L[0]

[1, 2]

In [62]:
# second row of L
L[1]

[3, 4]

In [64]:
# to get the element at column 1 for first row
L[0][0]

1

In [65]:
L[0][1]

2

In [68]:
# Matrix with Numpy - prints the matrox formatted
A=np.array([[1,2],[3,4]])
A

array([[1, 2],
       [3, 4]])

In [69]:
# access elements in numpy array
A[0][1]

2

In [70]:
# can also use this notation
A[0,1]

2

In [71]:
# to return only the column - returns the column at index 0
A[:,0]

array([1, 3])

In [72]:
# transpose of matrix A
A.T

array([[1, 3],
       [2, 4]])

In [73]:
np.exp(A)

array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])

In [74]:
# can also apply it to the list- numpy already knows what list is and treats it as numpy array
np.exp(L)

array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])

In [75]:
# if data is given in list, its ok to pass it as list to np stack, no need to convert it to array first

In [77]:
# new matrix of 2X3
B= np.array([[1,2,3],[4,5,6]])
B

array([[1, 2, 3],
       [4, 5, 6]])

In [78]:
# dot product
A.dot(B)

array([[ 9, 12, 15],
       [19, 26, 33]])

In [79]:
# * does element-wise multiplication, dot does the dot-product. The inner dimensions must match
A.dot(B.T)

ValueError: shapes (2,2) and (3,2) not aligned: 2 (dim 1) != 3 (dim 0)

In [80]:
# determinant
np.linalg.det(A)

-2.0000000000000004

In [81]:
#inverse of matrix
np.linalg.inv(A)

array([[-2. ,  1. ],
       [ 1.5, -0.5]])

In [83]:
np.linalg.inv(A).dot(A)

array([[1.0000000e+00, 4.4408921e-16],
       [0.0000000e+00, 1.0000000e+00]])

In [84]:
# matrix trace
np.trace(A)

5

In [85]:
# diagnol of matrix - gives the diagnol vector
np.diag(A)

array([1, 4])

In [86]:
# if find the diagnol of vector - get a matrix - overloaded function
np.diag([1,4])

array([[1, 0],
       [0, 4]])

In [90]:
# eigen values of A
np.linalg.eig(A)

(array([-0.37228132,  5.37228132]), array([[-0.82456484, -0.41597356],
        [ 0.56576746, -0.90937671]]))

In [91]:
Lam, V =np.linalg.eig(A)

In [93]:
# check the eigen values
V[:,0]* Lam[0]== A @ V[:,0]

array([ True, False])

In [94]:
V[:,0]* Lam[0], A @ V[:,0]

(array([ 0.30697009, -0.21062466]), array([ 0.30697009, -0.21062466]))

In [95]:
# one of the value is false due to precision, so use allclose
np.allclose(V[:,0]* Lam[0], A @ V[:,0])

True

In [96]:
# can check all the eigen values
np.allclose(V * Lam, A @ V)

True

In [100]:
# eigenh- does the transpose and returns the complex conjugate of elements
np.linalg.eigh(A)
# for application in signal systems etc - numpy handles the complex numbers as well

(array([-0.85410197,  5.85410197]), array([[-0.85065081,  0.52573111],
        [ 0.52573111,  0.85065081]]))

# Solving Linear Systems

In [101]:
# use solve for linear systems
#given two linear equations , represent them as matrices
A =np.array([[1,1], [1.5,4]])
b=np.array([2200,5050])
x=np.linalg.solve(A,b)

In [102]:
x

array([1500.,  700.])

In [103]:
# should not use inv function for big datasets and high dimensions
np.linalg.inv(A).dot(b)

array([1500.,  700.])

# Generating Data

In [105]:
# to generate synthetic data
# lets start with array of zeros
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [106]:
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [107]:
10* np.ones((2,3))

array([[10., 10., 10.],
       [10., 10., 10.]])

In [108]:
# create an identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [109]:
# generate arrays with random numbers
np.random.random()

0.6696344375135158

In [110]:
np.random.random((2,3))

array([[0.76067815, 0.71879536, 0.12255562],
       [0.387028  , 0.44740582, 0.12077799]])

In [112]:
# what distribution the random numbers come from
# random numbers come from 0 to 1
# random numbers from gaussian distribution
np.random.randn(2,3)

array([[-0.19382158,  0.4417349 ,  2.50180149],
       [-2.99603375, -2.11305742, -1.13001823]])

In [113]:
# get some positive and negative numbers
# function doesnt accept the tuple as input but the scalar

In [114]:
# random numbers
R = np.random.randn(10000)

In [115]:
# find mean
R.mean()

-4.955391435026755e-05

In [116]:
np.mean(R)

-4.955391435026755e-05

In [117]:
#variance
R.var()

1.0192673105968422

In [118]:
np.var(R)

1.0192673105968422

In [119]:
# standard deviation
R.std()

1.0095876933663772

In [120]:
np.std(R)

1.0095876933663772

In [121]:
# say a natrix of size 10000 X 3 where each element is drawn by random normal
R= np.random.randn(10000,3)

In [122]:
# to find mean of each column
R.mean(axis=0)

array([ 0.00318417, -0.0133326 ,  0.00434398])

In [123]:
# to find the mean of each row
R.mean(axis=1)


array([-0.43076211,  0.4011133 , -0.48066567, ...,  0.56399413,
        0.44259573,  0.15992946])

In [124]:
# to see the shape
R.mean(axis=1).shape

(10000,)

In [125]:
# Analog of variance is the covariance. 
np.cov(R)

array([[ 0.43779616,  0.11980358,  0.14820141, ..., -0.02768239,
         0.46836652, -0.51150305],
       [ 0.11980358,  0.04145362,  0.02926182, ...,  0.02737974,
         0.03229779, -0.13801219],
       [ 0.14820141,  0.02926182,  0.06488148, ..., -0.05490839,
         0.2834457 , -0.17570763],
       ...,
       [-0.02768239,  0.02737974, -0.05490839, ...,  0.14269282,
        -0.41617878,  0.04025151],
       [ 0.46836652,  0.03229779,  0.2834457 , ..., -0.41617878,
         1.56130057, -0.568911  ],
       [-0.51150305, -0.13801219, -0.17570763, ...,  0.04025151,
        -0.568911  ,  0.59806291]])

In [126]:
np.cov(R).shape

(10000, 10000)

In [127]:
# cov function treats its column as vector observation but it should consider each row as sample observation
# so we can fix this by trasnposing first
np.cov(R.T)

array([[1.00031005, 0.0022272 , 0.01555621],
       [0.0022272 , 1.0002896 , 0.00389364],
       [0.01555621, 0.00389364, 1.00362905]])

In [128]:
np.cov(R.T).shape

(3, 3)

In [129]:
# or another option is to use rowvar and set it to False. The default here is True
np.cov(R, rowvar=False)

array([[1.00031005, 0.0022272 , 0.01555621],
       [0.0022272 , 1.0002896 , 0.00389364],
       [0.01555621, 0.00389364, 1.00362905]])

In [131]:
# randomly generate integers - low range is inclusive, high is exclusive
np.random.randint(0,10, size=(3,3))

array([[6, 9, 0],
       [0, 3, 7],
       [1, 1, 2]])

In [133]:
# choice function - random numbers between 0 to n , can mention if the sample is with or without replacement
#replace= True /False , p - probability with which a number should be selected
np.random.choice(10, size=(3,3), replace =False)

array([[4, 9, 0],
       [1, 3, 6],
       [2, 5, 7]])

## Speed test for Matrix multiplication - Array vs Lists

In [134]:
# speed comparision #
from datetime import datetime

In [171]:
# define two matrix 
a = np.random.randn(10,10)
b = np.random.rand(10,10)

In [172]:
T =10000

In [173]:
# matrix multiplication using lists
def matrix_mul_list(x,y):
    res=0
    for i in range(len(x)):
        for j in range(len(y)):
            res+= x[i]*y[j]
    return res

In [174]:
# time elapsed using list
t0=datetime.now()
for t in range(T):
    matrix_mul_list(a,b)
dt1=datetime.now()-t0
dt1.total_seconds()

1.681002

In [175]:
# time elapsed using numpy array
t0=datetime.now()
for t in range(T):
    a.dot(b)
dt2=datetime.now()-t0
dt2.total_seconds()

0.015281

In [176]:
print("dt1/dt2 :",dt1.total_seconds()/ dt2.total_seconds())

dt1/dt2 : 110.00602054839344
