Note: Merged several ipynb files into this via this CLI code
> nbmerge ArraysVSLists.ipynb dotproduct.ipynb speedtest.ipynb matrices.ipynb solvinglinearsystems.ipynb generatingdata.ipynb > numpytutorial.ipynb

## Arrays vs Lists

In [34]:
import numpy as np

In [35]:
L = [1,2,3] # Python List

In [36]:
A = np.array([1,2,3]) # NumPy Array

In [37]:
for e in L: # 'e' for 'element'
    print(e)

1
2
3


In [38]:
for e in A:
    print(e)

1
2
3


In [39]:
L.append(4) # Append item to list

In [40]:
L

[1, 2, 3, 4]

In [41]:
# Cannot do "A.append(4). This is because unlike a list, the size of an array is fixed."
A.append(4)

AttributeError: 'numpy.ndarray' object has no attribute 'append'

In [None]:
L + [5]

[1, 2, 3, 4, 5]

In [None]:
A + np.array([4]) 
# Element 4 is added to every element in A. This is called broadcasting in NumPy nomenclature.
# Technically in math this is illegal, you can't add two vectors of different sizes, but in NumPy this makes sense.
# NumPy is about doing math, which is why the plus operator does an actual mathematical operation
# https://numpy.org/doc/stable/user/basics.broadcasting.html

array([5, 6, 7])

In [None]:
A + np.array([4,5,6])
# Plus sign is intelligent. It broadcasts when you add a scalar to an array, and adding an array to an array does regular vector addition. 

array([5, 7, 9])

In [None]:
A + np.array([4,5]) # Error, can't add vectors of different sizes.

ValueError: operands could not be broadcast together with shapes (3,) (2,) 

In [None]:
2 * A # Scalar and multiplication: broadcasting.

array([2, 4, 6])

In [None]:
2 * L # This just does repetition, which means you can't multiply a list by a non-int number

[1, 2, 3, 4, 1, 2, 3, 4]

In [None]:
L + L

[1, 2, 3, 4, 1, 2, 3, 4]

In [None]:
L2 = []
for e in L:
    L2.append(e + 3)

In [None]:
L2

[4, 5, 6, 7]

In [None]:
# Same as cell 17, but with list comprehension. List comprehension is the most common comprehension
# https://towardsdatascience.com/4-types-of-comprehensions-in-python-2fbeafdf2fda
# my_list = [<expression> for <item> in <iterable> if <condition>]
# condition optional
L2 = [e + 3 for e in L]
L2

[4, 5, 6, 7]

In [None]:
L**2 # Error: doesn't work with list, but the below command works

TypeError: unsupported operand type(s) for ** or pow(): 'list' and 'int'

In [None]:
L2 = [e**2 for e in L]
L2

[1, 4, 9, 16]

In [None]:
A**2 # On the contrary, it works with NumPy arrays. 
# In general, applying a function to an array very often applies it element wise. 

array([1, 4, 9])

In [42]:
np.sqrt(A) # Other functions on arrays

array([1.        , 1.41421356, 1.73205081])

In [43]:
np.log(A)

array([0.        , 0.69314718, 1.09861229])

In [44]:
np.exp(A)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [45]:
np.tanh(A) # Hyperbolic tangent, common in deep learning

array([0.76159416, 0.96402758, 0.99505475])

## The Dot Product

In [2]:
import numpy as np

In [3]:
a = np.array([1,2])
b = np.array([3,4])

In [4]:
# Method 1
dot = 0
for e, f, in zip(a,b): # zip is an iterator of tuples 
    dot += e * f
dot


11

In [6]:
# Method 2
dot = 0
for i in range(len(a)):
    dot += a[i] * b[i]
dot

11

In [7]:
a * b # does element wise multiplication

array([3, 8])

In [8]:
# Method 3
np.sum(a*b)

11

In [10]:
# Method 4
(a * b).sum()
# ChatGPT
# In general, Methods 3 and 4 are preferred when using NumPy, as they take advantage of NumPy's optimized and vectorized functions, resulting in better performance, especially for large arrays. However, Methods 1 and 2 can still be useful for smaller arrays or when NumPy is not available.as_integer_ratio
# Yes, Methods 1 and 2 would work with Python lists as well. Both methods utilize native Python loops and operations, which are compatible with lists. Here are the examples with Python lists:


11

In [12]:
np.dot(a,b) # Instance method

11

In [14]:
amag = np.sqrt((a*a).sum()) # Magnitude of A
amag

2.23606797749979

In [15]:
np.linalg.norm(a)  # Results in same answer

2.23606797749979

In [17]:
cosangle = a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b))
cosangle

0.9838699100999074

In [18]:
angle = np.arccos(cosangle)
angle

0.17985349979247847

## Speed Test

In [4]:
## Speed comparison ##
from datetime import datetime
import numpy as np
# Speed test to compare numpy vs regular python lists in terms of dot product
a = np.random.randn(100)  # Array of 100 random numbers, stdev = 1
b = np.random.randn(100)
T = 100000

def slow_dot_product(a,b):
    result = 0
    for e,f in zip(a,b):
        result += e*f
    return result

t0 = datetime.now()

for t in range(T):
    slow_dot_product(a,b)
dt1 = datetime.now() - t0

t0 = datetime.now()

for t in range(T):
    a.dot(b)
dt2 = datetime.now() - t0

print("dt1/dt2:", dt1.total_seconds() / dt2.total_seconds())
# 38 on first try
# 40 on second try

dt1/dt2: 38.368471862806885


## Matrices

In [2]:
# Can think of 2D arrays as matrices
# numpy array can have any number of dimensions, but matrices are always 2D
L = [[1,2],[3,4]]
L

[[1, 2], [3, 4]]

In [3]:
L[0]  # Returns first row of L

[1, 2]

In [4]:
L[0][1]

2

In [5]:
import numpy as np
A = np.array([[1,2],[3,4]])
A

array([[1, 2],
       [3, 4]])

In [6]:
A[0][1]

2

In [7]:
A[0,1]

2

In [8]:
A[:,0]  # Returns colummn at index 0
# Colon means select everything in this dimension

array([1, 3])

In [9]:
A.T  # Transpose

array([[1, 3],
       [2, 4]])

In [10]:
np.exp(A)

array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])

In [11]:
np.exp(L)  # Returns a np array, even with list input

array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])

In [12]:
B = np.array([[1,2,3],[4,5,6]])
B

array([[1, 2, 3],
       [4, 5, 6]])

In [13]:
A.dot(B)  # Matrix multiplication, which is a generalization of dot product


array([[ 9, 12, 15],
       [19, 26, 33]])

In [14]:
A.dot(B.T)
# Inner dimensions must match for matrix multiplication

ValueError: shapes (2,2) and (3,2) not aligned: 2 (dim 1) != 3 (dim 0)

In [None]:
np.linalg.det(A)

-2.0000000000000004

In [None]:
np.linalg.inv(A)

array([[-2. ,  1. ],
       [ 1.5, -0.5]])

In [None]:
np.linalg.inv(A).dot(A)

array([[1.00000000e+00, 0.00000000e+00],
       [2.22044605e-16, 1.00000000e+00]])

In [None]:
np.trace(A)

5

In [None]:
np.diag(A)

array([1, 4])

In [None]:
np.diag([1,4])

array([[1, 0],
       [0, 4]])

In [None]:
np.linalg.eig(A)  # Eigen decomposition, returns array with eigenvalues, and array with eigenectors

(array([-0.37228132,  5.37228132]),
 array([[-0.82456484, -0.41597356],
        [ 0.56576746, -0.90937671]]))

In [15]:
Lam, V = np.linalg.eig(A)

In [16]:
V[:,0] * Lam[0] == A @ V[:,0]
# Should return True and True

array([ True, False])

In [18]:
V[:,0] * Lam[0], A @ V[:,0]


(array([ 0.30697009, -0.21062466]), array([ 0.30697009, -0.21062466]))

In [19]:
np.allclose(V[:,0] * Lam[0], A @ V[:,0])
# Returns True if two arrays are element-wise equal within a tolerance.


True

In [20]:
np.allclose(V @ np.diag(Lam), A @ V)

True

## Solving Linear Systems

In [2]:
#  DO NOT solve linear systems with the method Ax = B <-> x = A^-1 B 
#  This is slow and inaccurate. There are better algorithms
#  ALWAYS use np.linalg.solve()
import numpy as np
A = np.array([[1,1],[1.5,4]])
b = np.array([2200,5050])


In [6]:
np.linalg.solve(A,b)  # Yes


array([1500.,  700.])

In [5]:
np.linalg.inv(A).dot(b)  # no
# ONly works because it's a very simple problem

array([1500.,  700.])

## Generating Data

In [3]:
# Typing data by hand is not feasible when data has hundreds of dimensions
# Synthetic data can be used to test efficacy of our models.

# Array of all zeros
import numpy as np
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [4]:
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [5]:
10 * np.ones((2,3))

array([[10., 10., 10.],
       [10., 10., 10.]])

In [6]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [9]:
np.random.random()

0.02288612236556209

In [10]:
np.random.random((2,3))

array([[0.1193711 , 0.88481118, 0.82908149],
       [0.37034521, 0.1636773 , 0.75210418]])

In [12]:
np.random.randn(2,3)

array([[-1.81939758,  1.42611423,  1.08501614],
       [-0.02595722,  2.21864627, -0.63946396]])

In [15]:
R = np.random.randn(10000)

In [16]:
R.mean()  # Instance Method

0.005610392321705379

In [17]:
np.mean(R)   # Top-level function

0.005610392321705379

In [18]:
R.var()

1.0092961059268886

In [19]:
R.std()

1.0046373006846245

In [20]:
R = np.random.randn(10000,3)

In [21]:
R.mean(axis=0)  # Mean of each column

array([-0.00819753, -0.00041474, -0.00664035])

In [23]:
R.mean(axis=1).shape

(10000,)

In [25]:
np.cov(R).shape

# cov treats every 

(10000, 10000)

In [26]:
np.cov(R.T)

array([[1.00011059, 0.00948995, 0.01087786],
       [0.00948995, 0.99135824, 0.02069535],
       [0.01087786, 0.02069535, 0.98413491]])

In [27]:
np.cov(R, rowvar = False)  # Same as above

array([[1.00011059, 0.00948995, 0.01087786],
       [0.00948995, 0.99135824, 0.02069535],
       [0.01087786, 0.02069535, 0.98413491]])

In [31]:
np.random.randint(0,10, size = (3,3))

array([[6, 6, 2],
       [4, 1, 5],
       [4, 6, 6]])

In [32]:
np.random.choice(10,size=(3,3))

array([[3, 5, 6],
       [0, 7, 2],
       [2, 0, 2]])