# Arrays vs. Lists

In [1]:
import numpy as np

In [2]:
L = [1,2,3]

In [3]:
A = np.array([1,2,3])

In [5]:
for e in L:
    print(e)

1
2
3


In [6]:
for e in A:
    print(e)

1
2
3


In [7]:
L.append(4)

Size of list can change, but size of array is fixed, so L.append would work but A.append would not.

This is for memory efficiency

In [8]:
A.append(4)

AttributeError: 'numpy.ndarray' object has no attribute 'append'

In [9]:
L + [5]

[1, 2, 3, 4, 5]

for lists, "+" means concatenation, but for arrays it's called "broadcasting" where it's doing element-wise addition:

In [10]:
A + np.array([4])

array([5, 6, 7])

we could also add element-wise array by array (if they are the same size)

In [11]:
A + np.array([3,4,5])

array([4, 6, 8])

if the size is different then there will be an error

In [12]:
A + np.array([3,4])

ValueError: operands could not be broadcast together with shapes (3,) (2,) 

The multiply operator does multiplication for arrays, but does repetition for lists

In [13]:
2 * A

array([2, 4, 6])

In [14]:
2 * L

[1, 2, 3, 4, 1, 2, 3, 4]

we could do same element-wise operation in lists by a loop

In [15]:
L2 = [e + 3 for e in L]
L2

[4, 5, 6, 7]

In [16]:
L3 = [e**2 for e in L]
L3

[1, 4, 9, 16]

element-wise operation is much easier in numpy arrays

In [17]:
A**2

array([1, 4, 9], dtype=int32)

In [18]:
np.sqrt(A)

array([1.        , 1.41421356, 1.73205081])

In [19]:
np.log(A)

array([0.        , 0.69314718, 1.09861229])

In [20]:
np.exp(A)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [21]:
np.tanh(A)

array([0.76159416, 0.96402758, 0.99505475])

# The Dot Product 

In [22]:
a = np.array([1,2])
b = np.array([3,4])

We want to use zip() to do element-wise operation

In [23]:
dot = 0
for e,f in zip(a,b):
    dot += e * f
dot

11

We could also do it by manually do element-wise operation

In [24]:
dot = 0
for i in range(len(a)):
    dot += a[i] * b[i]
dot

11

If we use * for two arrays it's gonna multiply element-wise

In [25]:
a * b

array([3, 8])

In [26]:
np.sum(a * b)

11

In [27]:
np.dot(a, b)

11

In [28]:
a.dot(b)

11

We could get the magnitude of an array

In [31]:
a_mag = np.sqrt((a * a).sum())
a_mag

2.23606797749979

In numpy we have built-in function for magnitude

In [32]:
np.linalg.norm(a)

2.23606797749979

We could calculate cosine angle of a and b

In [34]:
cosangle = a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b))
cosangle

0.9838699100999074

We could then calculate the arc of the angle

In [35]:
angle = np.arccos(cosangle)

# Speed Test for Numpy Arrays vs. Lists

In [52]:
## speed comparison ##
from datetime import datetime

a = np.random.randn(100)
b = np.random.randn(100)
T = 100000

def slow_dot_product(a, b):
    result = 0
    for e, f in zip (a, b):
        result += e * f
    return result

t0 = datetime.now()
for t in range(T):
    slow_dot_product(a, b)
# this is time used using list operation
dt1 = datetime.now() - t0 

t0 = datetime.now()
for t in range(T):
    a.dot(b)
dt2 = datetime.now() - t0

print("dt1 = ", dt1.total_seconds())
print("dt2 = ", dt2.total_seconds())
print("dt1/dt2 = ", dt1.total_seconds() / dt2.total_seconds())

dt1 =  4.887927
dt2 =  0.074773
dt1/dt2 =  65.37021384724433


# Matrices

Matrix could be represented by a list of lists

In [53]:
L = [[1,2],[3,4]]
L

[[1, 2], [3, 4]]

In [54]:
L[0]

[1, 2]

In [55]:
L[0][1]

2

In numpy we could first import list as array

In [56]:
A = np.array(L)
A

array([[1, 2],
       [3, 4]])

In [57]:
A[0][1]

2

We could use matrix notation in numpy which is easier

In [58]:
A[0,1]

2

We could also use column notation

In [59]:
A[:,0]

array([1, 3])

We could do transposition of matrix

In [60]:
A.T

array([[1, 3],
       [2, 4]])

Element-wise operation still works for matrix

In [61]:
np.exp(A)

array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])

In [62]:
np.exp(L)

array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])

In [63]:
B = np.array([[1,2,3],[4,5,6]])
B

array([[1, 2, 3],
       [4, 5, 6]])

" * " does element-wise multiplication, not matrix multiplication. 
We'll use the .dot() function to do matrix multiplication

In [64]:
A.dot(B)

array([[ 9, 12, 15],
       [19, 26, 33]])

Matrix opearation needs m by n .dot() n by p, otherwise it will incurr error

In [65]:
A.dot(B.T)

ValueError: shapes (2,2) and (3,2) not aligned: 2 (dim 1) != 3 (dim 0)

Matrix determinant using .det()

In [66]:
np.linalg.det(A)

-2.0000000000000004

Matrix inverse using .inv()

In [67]:
np.linalg.inv(A)

array([[-2. ,  1. ],
       [ 1.5, -0.5]])

So if we multiply inverse with original we get identity matrix

In [68]:
np.linalg.inv(A).dot(A)

array([[1.0000000e+00, 4.4408921e-16],
       [0.0000000e+00, 1.0000000e+00]])

Trace of matrix

In [69]:
np.trace(A)

5

Diagonal of matrix

In [71]:
A

array([[1, 2],
       [3, 4]])

In [72]:
np.diag(A)

array([1, 4])

In [73]:
np.diag([1,4])

array([[1, 0],
       [0, 4]])

eigen decomposition of matrix

In [76]:
eig_A = np.linalg.eig(A)
eig_A

(array([-0.37228132,  5.37228132]),
 array([[-0.82456484, -0.41597356],
        [ 0.56576746, -0.90937671]]))

In [77]:
Lam, V = eig_A

In [79]:
Lam

array([-0.37228132,  5.37228132])

In [80]:
V

array([[-0.82456484, -0.41597356],
       [ 0.56576746, -0.90937671]])

In [81]:
V[:,0] * Lam[0], A @ V[:, 0]

(array([ 0.30697009, -0.21062466]), array([ 0.30697009, -0.21062466]))

it seems that the two are equal but using == would not return True because of storing rounding

In [82]:
V[:,0] * Lam[0] == A @ V[:, 0]

array([ True, False])

We could use the allclose() function for array comparison

In [83]:
np.allclose(V[:,0] * Lam[0], A @ V[:, 0])

True

In [85]:
np.allclose(V @ np.diag(Lam), A @ V)

True

# Linear Systems Solving

For example, ticket is \\$1.50 for children, \\$4,00 for adults <br>
2,200 people attended and \\$5,050 fees collected <br>
How many children and how many adults?

We have:
$$
x_1 + x_2 = 2200\\
1.5x_1 + 4x_2 = 5500\\
$$

We would try to solve using matrix like:
$$
x = \begin{bmatrix} x_1 \\ x_2 \end{bmatrix}, \\
A = \begin{bmatrix} 1 & 1 \\ 1.5 & 4 \end{bmatrix}, \\
b = \begin{bmatrix} 2200 \\ 5050 \end{bmatrix} \\
$$

We need to solve for:
$$
Ax = b \longleftrightarrow x = A^{-1}b
$$

In [88]:
A = np.array([[1,1],[1.5,4]])
b = np.array([2200,5050])

In [89]:
np.linalg.inv(A).dot(b)

array([1500.,  700.])

But using "inv(A)" is slower and less accurate, we could use solve() for linear systems

In [90]:
np.linalg.solve(A,b)

array([1500.,  700.])

# Generating Data

Generating zeros or ones. <br>
This is very similar to MATLAB

In [91]:
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [92]:
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [93]:
np.ones((2,3)) * 10

array([[10., 10., 10.],
       [10., 10., 10.]])

Generating Identity matrix

In [94]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

Generating np random numbers

In [95]:
np.random.random()

0.6492665092590005

Generating arrays with random numbers

In [96]:
np.random.random((2,3))

array([[0.93187263, 0.51947112, 0.28452559],
       [0.48118074, 0.83576439, 0.17288781]])

Generating random numbers from Normal Distribution <br>
The distribution by defaut is:
$$
X \sim N(0, 1)
$$

In [98]:
np.random.randn(2, 3)

array([[ 0.26568948,  0.1714343 , -1.32691621],
       [ 0.19110132,  0.83680374,  0.45380267]])

In [100]:
R = np.random.randn(10000)
R

array([-0.274194  ,  0.48223659, -0.53925512, ..., -1.38302745,
        2.24328703, -1.95831083])

In [101]:
R.mean() 
# should be close to 0

-0.006093142958448891

In [102]:
np.mean(R)

-0.006093142958448891

In [103]:
R.var()
# should be close to 1

0.9993834883422409

In [104]:
R.std()

0.9996916966456413

Matrix of random data drawn from Normal Distribution

In [108]:
R = np.random.randn(10000, 5)
R # it's a 10000 by 5 matrix

array([[ 0.24377119, -1.64398017, -0.2358431 ,  0.93182715, -0.13094507],
       [-1.02788216,  0.19054064,  0.05442015,  0.74315103,  1.8945907 ],
       [-1.05480489, -1.28422267, -1.43505391,  0.32547177, -0.80010685],
       ...,
       [ 1.42034382,  0.30938989,  0.00651991, -0.7881205 ,  0.80655273],
       [ 0.04863272, -0.43444003, -0.07179467,  0.65272073, -1.3858372 ],
       [-0.03888459,  0.53846075,  0.130318  ,  0.31625924, -0.66769939]])

Calculate mean of each row or column

In [109]:
R.mean(axis = 0) # mean of each column

array([-0.00629687, -0.00674088,  0.01486362,  0.00169917, -0.01596907])

In [110]:
R.mean(axis = 1) # mean of each row

array([-0.167034  ,  0.37096407, -0.84974331, ...,  0.35093717,
       -0.23814369,  0.0556908 ])

In [111]:
R.mean(axis = 1).shape # check the dimension

(10000,)

In [112]:
R.shape

(10000, 5)

We could get the covariance matrix of a matrix. <br>
Each row is a sample, and each column is an attribute. <br>
The cov() returns the covariance matrix of each row of the matrix. <br>
So for a m by n matrix, cov(R) returns m by m and cov(R.T) returns n by n

In [113]:
np.cov(R)

array([[ 0.89091596,  0.04439284,  0.47273162, ..., -0.1777109 ,
         0.33344983, -0.12419754],
       [ 0.04439284,  1.13737117,  0.26588589, ..., -0.27729265,
        -0.45887117, -0.24590644],
       [ 0.47273162,  0.26588589,  0.48925133, ..., -0.32891858,
         0.22977597,  0.00906938],
       ...,
       [-0.1777109 , -0.27729265, -0.32891858, ...,  0.69225448,
        -0.32002757, -0.19332263],
       [ 0.33344983, -0.45887117,  0.22977597, ..., -0.32002757,
         0.56482119,  0.23822189],
       [-0.12419754, -0.24590644,  0.00906938, ..., -0.19332263,
         0.23822189,  0.20969246]])

In [114]:
np.cov(R).shape

(10000, 10000)

In [115]:
np.cov(R.T)

array([[ 0.99603741, -0.01358569,  0.00289136, -0.01051736,  0.00881838],
       [-0.01358569,  1.00966292,  0.00976643, -0.00454176, -0.02499608],
       [ 0.00289136,  0.00976643,  1.00079288,  0.0118524 , -0.01504226],
       [-0.01051736, -0.00454176,  0.0118524 ,  0.98347094, -0.01464959],
       [ 0.00881838, -0.02499608, -0.01504226, -0.01464959,  1.00249332]])

In [116]:
np.cov(R.T).shape

(5, 5)

Generating random integers

In [121]:
np.random.randint(0, 10, size = (3, 3))
# generates 3 by 3 matrix, ranging from 0 to 10 (10 is excluded)

array([[9, 9, 6],
       [0, 5, 1],
       [5, 0, 0]])

We could use np.random.choice to get similar results

In [122]:
np.random.choice(10, size = (3,3))

array([[7, 8, 2],
       [5, 8, 5],
       [3, 2, 2]])