# Lists vs Arrays

In [1]:
import numpy as np

In [14]:
L = [1,2,3]

In [15]:
A = np.array([1,2,3])

In [16]:
for i in L:
    print(i)

1
2
3


In [17]:
for i in A:
    print(i)

1
2
3


A and L print the same thing, so how are they different?

<font color = red>So with a list you can APPEND:</font>

In [18]:
L.append(4)
print(L)

[1, 2, 3, 4]


In [19]:
A.append(4)

AttributeError: 'numpy.ndarray' object has no attribute 'append'

Different way to add an elemet to a list

In [21]:
L = L + [5]
print(L)

[1, 2, 3, 4, 5, 5]


Lets do this in the numpy array:

In [22]:
A = A + [4,5]

ValueError: operands could not be broadcast together with shapes (3,) (2,) 

In [23]:
L2 = []

for e in L :
    L2.append(e + e)
L2

[2, 4, 6, 8, 10, 10]

In [24]:
A + A

array([2, 4, 6])

### '+' sign with list does CONCATENATION, '+' sign with numpy array does vector addition.

In [25]:
#Scalar multiplication works with numpy too
2 * A

array([2, 4, 6])

In [28]:
# What about in the case for the list?
2 * L
print(2*L)
print("Repeated the list twice!")

[1, 2, 3, 4, 5, 5, 1, 2, 3, 4, 5, 5]
Repeated the list twice!


In [37]:
# How about when you want to square list/array?
L**2

TypeError: unsupported operand type(s) for ** or pow(): 'list' and 'int'

In [42]:
# You can square the elements of a list by using a for loop
L2 = []
for i in L:
    L2.append(i*i)

L2

[1, 4, 9, 16, 25, 25]

In [44]:
# Squaring elements of an array
A**2

array([1, 4, 9])

In [45]:
# Square root an array
np.sqrt(A)

array([1.        , 1.41421356, 1.73205081])

In [46]:
# log of an array
np.log(A)

array([0.        , 0.69314718, 1.09861229])

In [47]:
# exponential of an array
np.exp(A)

array([ 2.71828183,  7.3890561 , 20.08553692])

In conclusion, when performing mathematical operations on the elements of a matrix, you want to use the numpy array over for-looping a list since they are faster and much more convenient.

# Dot Product

In [55]:
a = np.array([1,2])

In [56]:
b = np.array([2,1])

In [57]:
dot = 0

In [58]:
# for loop to calculate the dot product of a and b
for e, f in zip(a,b):
    dot += e*f

In [59]:
dot

4

In [60]:
a*b

array([2, 2])

In [61]:
# calculating dot product using np.sum()
# np.sum() adds all the elements in the array
np.sum(a*b)

4

In [65]:
# Alternative way of calculating the dot product
(a*b).sum()

4

In [67]:
# More conveniently via np.dot()
np.dot(a,b)

4

In [68]:
# instance method
a.dot(b)

4

In [69]:
b.dot(a)

4

In [71]:
# Finding the angle between two vectors
amag = np.sqrt((a*a).sum())
amag

2.23606797749979

In [73]:
# Numpy's function for finding the angle
# np.linalg.norm()
amag = np.linalg.norm(a)
amag

2.23606797749979

In [74]:
cosangle = a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b))
cosangle

0.7999999999999998

In [77]:
angle = np.arccos(cosangle)
angle

0.6435011087932847

# Dot Product II: Speed comparison

In [25]:
import numpy as np
from datetime import datetime
from builtins import range

# randomly generate 100 samples from the Normal distribution
a = np.random.randn(100) 
b = np.random.randn(100)
T = 10000

# define the slow dot product function
def slow_dot_product(a, b):
    result = 0
    for e,f in zip(a,b): # loop over a,b arrays 
        result += e*f    # multiply i,jth element from the arrays and
    return result        # add them into the result variable to 
                         # calculate the dot product

# calculate how long it takes to compute the dot product using for loop
t0 = datetime.now()        # returns current date and time
for i in range(T):         # loop over from 0 to 100000
    slow_dot_product(a, b)
dt1 = datetime.now() - t0  # subtract current time from the time when
                           # calculation started

# calculate how long it takes to compute dot product using x.dot(y)
t0 = datetime.now()
for t in range(T):
    a.dot(b)
dt2 = datetime.now() - t0

print("dt1 / dt2:", dt1.total_seconds() / dt2.total_seconds())

dt1 / dt2: 35.1179187610379


# Vectors and Matrices

In [7]:
import numpy as np
M = np.array([[1,2],[3,4]])

In [8]:
L = [[1,2], [3,4]]

In [9]:
# Python List
L[0]

[1, 2]

In [10]:
L[0][0]

1

In [11]:
M[0][0]

1

In [12]:
# Above is the same as the following:
M[0,0]

1

In [19]:
# Following works some what similar with the numpy array
# Most of the times it is recommended that we use a numpy array
# over the numpy matrix
M2 = np.matrix([[1,2], [3,4]])
M2

matrix([[1, 2],
        [3, 4]])

In [20]:
# Note when you pass in the numpy array on the matrix,
# you get an array
A = np.array([M2])
A

array([[[1, 2],
        [3, 4]]])

In [21]:
# Transpose A by the following:
A.T

array([[[1],
        [3]],

       [[2],
        [4]]])

In [23]:
# A matrix is REALLY JUST a 2-dimensional Numpy Array
# A vector is a 1-dimensional Array

# Generating Matrices

In [24]:
np.array([1,2,3])
# This process may be inconvenient when you want to
# generate 1,2,3,....,n elements

array([1, 2, 3])

In [26]:
# create a function of zero
z = np.zeros(10)
z

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [28]:
# Creating a 10x10 matrix with zeros
z = np.zeros((10,10))
z

# Tuple: collection of Python objects separated by commas

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [31]:
# creating a 10x10 matrix with 1's
ones = np.ones((10, 10))
ones

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [35]:
# Creating a 10x10 matrix random numbers
# return numbers in the half open interval [0.0, 1.0)
# Pick numbers on a uniformly distributed intveral
random = np.random.random((10,10))
random

# When they say 'random numbers' => prob dist

array([[0.97708014, 0.62295533, 0.1812515 , 0.99810328, 0.00411326,
        0.8000151 , 0.94792016, 0.14637436, 0.27856838, 0.86102291],
       [0.81582548, 0.54731947, 0.65199378, 0.69630737, 0.22955551,
        0.96207121, 0.62556624, 0.86276973, 0.36876843, 0.50009213],
       [0.043136  , 0.60258042, 0.47987552, 0.37658998, 0.87367912,
        0.85206563, 0.20495818, 0.31389755, 0.62763565, 0.67124503],
       [0.46737045, 0.54750865, 0.85271051, 0.32353867, 0.39179791,
        0.13776644, 0.05240216, 0.61818074, 0.49775145, 0.86792618],
       [0.2294305 , 0.95430957, 0.52126714, 0.40970431, 0.11715155,
        0.14256709, 0.50255045, 0.25402169, 0.66208061, 0.98090465],
       [0.55346032, 0.85373   , 0.26969536, 0.46689698, 0.03453096,
        0.20015104, 0.93643681, 0.42534316, 0.35987246, 0.51491307],
       [0.19535393, 0.25276281, 0.92314142, 0.85946387, 0.15408279,
        0.55387584, 0.53039773, 0.66170991, 0.51758168, 0.07655521],
       [0.49921831, 0.89663639, 0.1257175

In [36]:
# How about for the normal distribution?
gauss = np.random.randn((10,10))
# Error occurs because what we passed in was wrong
# This function takes in each of the dimensions as individual args
# while all the others take in tuples

TypeError: 'tuple' object cannot be interpreted as an integer

In [37]:
gauss = np.random.randn(10,10)
gauss

array([[ 1.88461610e+00, -1.14540642e+00, -3.93038216e-01,
        -2.66628602e-01, -7.44121142e-04,  4.27834083e-01,
         1.50317593e+00,  5.00513779e-01,  3.67827067e+00,
         5.55040567e-01],
       [ 1.03786992e-01, -1.82225221e+00, -1.94044977e-02,
        -6.54494879e-01, -8.76750852e-01, -1.51784008e-01,
         1.23973005e-02, -1.05700508e+00,  1.40343107e+00,
         1.08174466e+00],
       [-9.70475892e-01, -1.41757983e-01, -5.46393682e-01,
        -6.62010810e-01,  9.69577931e-01, -7.26584339e-01,
        -9.57449468e-01,  7.28207062e-01,  5.26303773e-01,
         4.92335995e-02],
       [ 1.81637264e+00,  3.44188132e-01,  1.09306940e+00,
        -5.33737696e-01, -1.95315022e+00,  1.89866698e+00,
         8.78679335e-01, -1.63755386e-02,  1.39422854e+00,
         9.54841026e-01],
       [-1.43430892e+00,  3.94323405e-01, -8.30714970e-01,
        -8.38864733e-01, -1.87040230e-01,  6.20856923e-02,
         5.21093032e-01, -1.39988267e+00, -4.22587080e-01,
         1.

In [38]:
# Calculating the mean
gauss.mean()

0.09489809400379073

In [39]:
# Calculating the variance
gauss.var()

1.1039802033576462

# Matrix Products

In [None]:
# Note that multiplication is only possible when two matrices
# have the same inner dimensions, i.e. 3x2 and 2x4
# "*" in Numpy means element by element multiplication!
# "dot" in Numpy means matrix multiplication

## More Matrix Operations

In [9]:
import numpy as np
A = np.array([[1,2],[3,4]])
A

array([[1, 2],
       [3, 4]])

In [10]:
# finding inverse of A
Ainv = np.linalg.inv(A)
Ainv

array([[-2. ,  1. ],
       [ 1.5, -0.5]])

In [11]:
# check if above calculations are right
# above calculation is right if we get an identity matrix
Ainv.dot(A) 

array([[1.0000000e+00, 4.4408921e-16],
       [0.0000000e+00, 1.0000000e+00]])

In [12]:
A.dot(Ainv)

array([[1.00000000e+00, 1.11022302e-16],
       [0.00000000e+00, 1.00000000e+00]])

In [14]:
# finding the determinant of A
np.linalg.det(A)

-2.0000000000000004

In [16]:
# following method returns the diagonal elements
np.diag(A)

array([1, 4])

In [17]:
np.diag([1,2])

array([[1, 0],
       [0, 2]])

In [18]:
# If you pass in a 2d array in the np.diag method, you get a 1d array.
# If you pass in a 1d array you get a matrix with diagonal entries and
# all other entries as zero.

In [19]:
# Outerproduct/Innerproduct
a = np.array([1,2])
b = np.array([3,4])

In [20]:
# calculate the outer product
np.outer(a,b)

array([[3, 4],
       [6, 8]])

In [21]:
# inner product
np.inner(a,b)

11

In [23]:
# Check if the calculation is right
a.dot(b)

11

In [24]:
# Calculating the trace
np.diag(A).sum()

5

In [25]:
# Numpy's function for trace
np.trace(A)

5

In [38]:
# Eigenvalues/Eigenvectors
X = np.random.randn(100,3) # 100 samples with 3 columns

In [39]:
# calcuating covariance
cov = np.cov(X)

In [40]:
cov.shape

(100, 100)

In [42]:
# covariance of the transpose of X
# Make sure to transpose the matrix first if you want to
# find the covariance of the matrix!
cov = np.cov(X.T)
cov

array([[ 0.92459129, -0.07530937, -0.03271785],
       [-0.07530937,  1.00848676, -0.07878773],
       [-0.03271785, -0.07878773,  1.08650387]])

#### <font color=red>Eigenvalues, eigenvectors:</font>
**np.linalg.eigh(C)** is for **symmetric and hermitian matrix** only.

**Symmetric** means $A = A^T$

**Hermitian** menas $A = A^H$

$A^H =$ **conjugate transpose** of A

In [44]:
# first tuple contains 3 eigen values
# second tuple contains the eigen vectors
np.linalg.eigh(cov)

(array([0.85867248, 1.02468306, 1.13622637]),
 array([[ 0.77849205, -0.62393746,  0.06820686],
        [ 0.55019053,  0.62607327, -0.55256008],
        [ 0.30206044,  0.4676904 ,  0.83067754]]))

In [45]:
# we get the same answer as above!
# it is possible that the resulting eigenvectors can be in different
# orders
np.linalg.eig(cov)

(array([0.85867248, 1.02468306, 1.13622637]),
 array([[-0.77849205, -0.62393746,  0.06820686],
        [-0.55019053,  0.62607327, -0.55256008],
        [-0.30206044,  0.4676904 ,  0.83067754]]))

## Solving a Linear System

Problem: $Ax = b$

Solution: $A^{-1}Ax = x = A^{-1}b$

If it is invertible, then $x$ has a unique solution

In [46]:
A

array([[1, 2],
       [3, 4]])

In [47]:
b

array([3, 4])

In [49]:
b = np.array([1,2])
b

array([1, 2])

In [51]:
# Calculate A^-1b = x
x = np.linalg.inv(A).dot(b)
x

array([2.22044605e-16, 5.00000000e-01])

In [59]:
# Above computation as a function in numpy
# Learn to get used to using the following method instead of the prior
x = np.linalg.solve(A,b)
x

array([0. , 0.5])

## Word Problem
The admission fee at a small fair is 1.50 and 4.00 for adults. On a certain day, 2200 people enter the fair and 5050 is collected. How many children and how many adults attended?

Let:

$X1 =$ number of children, $X2 =$ number of adults

$X1 + X2 = 2200$

$1.5*X1 + 4*X2 = 5050$

In [65]:
A = np.array([[1, 1], [1.5, 4]])
b = np.array([2200, 5050])
x = np.linalg.solve(A,b)

print("So X1 =", x[0], "and X2 =", x[1])

So X1 = 1500.0 and X2 = 700.0
