<a href="https://colab.research.google.com/github/clshu/numpy_stack/blob/main/Numpy_LP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***Numpy***

# 2. Array vs List


In [3]:
import numpy as np

In [4]:
L = [1, 2, 3]
L

[1, 2, 3]

In [5]:
A = np.array(L)
A

array([1, 2, 3])

In [6]:
for e in L:
  print(e)

1
2
3


In [7]:
for e in A:
  print(e)

1
2
3


In [8]:
L.append(4)
L

[1, 2, 3, 4]

In [9]:
A.append(4)

AttributeError: ignored

In [11]:
L + [5] # cancatenation

[1, 2, 3, 4, 5]

In [12]:
A + np.array([4]) # broadcast addition. Illigal matahmatically.

array([5, 6, 7])

In [13]:
A + np.array([4, 5, 6]) # It's intelligentlly adding an array to another array of the same size.

array([5, 7, 9])

In [14]:
A + np.array([4, 5])

ValueError: ignored

In [16]:
2 * A

array([2, 4, 6])

In [17]:
2 * L

[1, 2, 3, 4, 1, 2, 3, 4]

In [18]:
L + L

[1, 2, 3, 4, 1, 2, 3, 4]

In [19]:
L2 = []
for e in L:
  L2.append(e + 3)
L2

[4, 5, 6, 7]

In [20]:
L2 = [e + 3 for e in L]
L2

[4, 5, 6, 7]

In [22]:
L ** 2

TypeError: ignored

In [24]:
L2 = [e ** 2 for e in L]
L2

[1, 4, 9, 16]

## numpy applies the mathmatical funtions element-wise

In [25]:
A ** 2

array([1, 4, 9])

In [26]:
np.sqrt(A)

array([1.        , 1.41421356, 1.73205081])

In [27]:
np.log(A)

array([0.        , 0.69314718, 1.09861229])

In [28]:
np.exp(A)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [29]:
np.tanh(A)

array([0.76159416, 0.96402758, 0.99505475])

### Conclusion: List is a data structure. numpy is designed to do math on that data structure.

# 3. The Dot Production


In [30]:
a = np.array([1, 2])
b = np.array([3, 4])

In [31]:
dot = 0
for e, f in zip(a, b):
  dot += e * f
dot

11

In [32]:
dot = 0
for i in range(len(a)):
  dot += a[i] * b[i]
dot

11

In [33]:
a * b

array([3, 8])

In [34]:
np.sum(a * b)

11

In [35]:
(a * b).sum()

11

In [36]:
np.dot(a, b)

11

In [37]:
a.dot(b)

11

In [38]:
a @ b

11

### Prodcut of a, b equals to magnitude of a times magnitude of b times cosin of the the angle between a and b

In [39]:
ameg = np.sqrt((a * a).sum())
ameg

2.23606797749979

In [40]:
np.linalg.norm(a)

2.23606797749979

In [41]:
cosangle = a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b))
cosangle

0.9838699100999074

In [42]:
angle = np.arccos(cosangle)
angle

0.17985349979247847

# 4. Speed Test


In [43]:
## speed comparison ##
from datetime import datetime
# Note: you can also use %timeit
a = np.random.randn(100)
b = np.random.randn(100)
T = 100000

def slow_dot_production(a, b):
  result = 0
  for e, f in zip(a, b):
    result += e * f
  return result

t0 = datetime.now()
for i in range(T):
  slow_dot_production(a, b)
dt1 = datetime.now() - t0

t0 = datetime.now()
for i in range(T):
  a.dot(b)
dt2 = datetime.now() - t0

print ("dt1/dt2(slow vs np)): ", dt1.total_seconds() / dt2.total_seconds())

def list_comp_dot_production(a, b):
  result = 0
  for i, value in enumerate(a):
    result += value * b[i]
  return result

t0 = datetime.now()
for i in range(T):
  list_comp_dot_production(a, b)
dt3 = datetime.now() - t0

print ("dt3/dt2(list comprehension vs np): ", dt3.total_seconds() / dt2.total_seconds())

print("slow_dot_production: ", slow_dot_production(a, b))
print("np dot production: ", a.dot(b))
print("list comprehension: ", list_comp_dot_production(a,b))

dt1/dt2(slow vs np)):  36.71749430460244
dt3/dt2(list comprehension vs np):  42.9967155376036
slow_dot_production:  17.86130071491069
np dot production:  17.861300714910698
list comprehension:  17.86130071491069




# 5. Matrics

### Not recommand np.matrix. It reqires a 2D array.



In [44]:
L = [[1, 2], [3, 4]]



In [45]:
L[0]


[1, 2]

In [46]:
L[0][1]

2

### np class methods can work with reqular arrays


In [47]:
np.exp(L)

array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])

In [48]:
A = np.array(L)
A

array([[1, 2],
       [3, 4]])

In [49]:
A[0][1]

2

In [50]:
A[0, 1] # better feature

2

In [51]:
A[:0]

array([], shape=(0, 2), dtype=int64)

In [52]:
A.T

array([[1, 3],
       [2, 4]])

In [53]:
np.exp(A)

array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])

### np treats a regular array as an np array and return results as an np array

In [54]:
np.exp(L)

array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])

In [55]:
B = [[1, 2, 3], [4, 5, 6]]
B

[[1, 2, 3], [4, 5, 6]]

In [56]:
A.dot(B)

array([[ 9, 12, 15],
       [19, 26, 33]])

In [57]:
A.dot(B.T)

AttributeError: ignored

In [59]:
np.linalg.det(A)

-2.0000000000000004

In [60]:
np.linalg.inv(A)

array([[-2. ,  1. ],
       [ 1.5, -0.5]])

In [61]:
np.linalg.inv(A).dot(A)

array([[1.00000000e+00, 0.00000000e+00],
       [1.11022302e-16, 1.00000000e+00]])

In [62]:
np.trace(A)

5

In [63]:
np.diag(A)

array([1, 4])

In [64]:
np.diag([1, 4])

array([[1, 0],
       [0, 4]])

In [65]:
np.linalg.eig(A)

(array([-0.37228132,  5.37228132]), array([[-0.82456484, -0.41597356],
        [ 0.56576746, -0.90937671]]))

In [66]:
Lam, V = np.linalg.eig(A)

In [67]:
V[:,0] * Lam[0] == A @ V[:,0]


array([ True, False])

In [68]:
V[:,0] * Lam[0], A @ V[:,0]

(array([ 0.30697009, -0.21062466]), array([ 0.30697009, -0.21062466]))

In [69]:
np.allclose(V[:,0] * Lam[0], A @ V[:,0])

True

In [70]:
np.allclose(V @ np.diag(Lam), A @ V)

True

# 6. Solving Linear System

In [71]:
# A park of entrance of 2200 persons, including children and adults. The total receipt is $5050
# A child's ticket price is $1.4 and an adult's price is $4. How many children and adults enter the park?


A = [[1, 1], [1.5, 4]]
b = [2200, 5050]



In [72]:
np.linalg.solve(A, b) # Good

array([1500.,  700.])

In [73]:
np.linalg.inv(A).dot(b) # Slow. Don't use it this way

array([1500.,  700.])

# 7. Generating Data

In [74]:
np.zeros((2 ,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [75]:
np.ones((2, 3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [76]:
10 * np.ones((2, 3))

array([[10., 10., 10.],
       [10., 10., 10.]])

In [78]:
np.eye(3) # identity matrix

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [79]:
np.random.random()

0.7483896916153487

### Which distribution the random numbers come from? Normal(Gaussian) or uniform distribution?

In [80]:
np.random.random((2, 3)) # uniform distribution

array([[0.03561882, 0.59720531, 0.8028132 ],
       [0.4573879 , 0.86958161, 0.40785871]])

In [84]:
np.random.randn(2, 3) # normal(gaussian) distribution. Argument signature is different.

array([[ 1.16610604, -0.36528594, -0.4240313 ],
       [-0.16683243, -2.01110271,  0.58942585]])

In [85]:
R = np.random.randn(100000)

In [86]:
R.mean()

-0.0021719416748600586

In [87]:
np.mean(R)

-0.0021719416748600586

In [88]:
R.var()

1.0022766495284354

In [90]:
R.std() # sqrt of variance

1.0011376776090466

In [92]:
S = np.random.randn(10000, 3)

In [97]:
S.mean(axis=0) # mean of each column

array([ 0.00930736,  0.01287201, -0.00045845])

In [98]:
S.mean(axis=1) # mean of each row

array([-0.0142417 ,  0.14552943, -0.04992252, ..., -0.13340752,
        0.1357983 , -0.61266331])

In [99]:
S.mean(axis=1).shape

(10000,)

### In ML, each row is a sample of the observations, each column is a specific measurement (10000, 3) means 10000 observations and 3 measurements per observations

In [100]:
np.cov(S)

array([[ 2.65365533,  0.06998626, -0.17561356, ..., -1.16273435,
         1.07994622,  0.0988066 ],
       [ 0.06998626,  0.86459838,  0.08326614, ...,  0.69693943,
        -0.50555048,  1.65748512],
       [-0.17561356,  0.08326614,  0.02057682, ...,  0.15107618,
        -0.12587618,  0.16206116],
       ...,
       [-1.16273435,  0.69693943,  0.15107618, ...,  1.1230951 ,
        -0.92357058,  1.35235366],
       [ 1.07994622, -0.50555048, -0.12587618, ..., -0.92357058,
         0.77005993, -0.98413754],
       [ 0.0988066 ,  1.65748512,  0.16206116, ...,  1.35235366,
        -0.98413754,  3.17796709]])

In [102]:
np.cov(S).shape # cov trats each column as vector observation. Not good for pytorch

(10000, 10000)

In [104]:
np.cov(S.T) # close to identity

array([[ 1.01430017, -0.00290286, -0.00151377],
       [-0.00290286,  1.00609194, -0.0013115 ],
       [-0.00151377, -0.0013115 ,  0.9850772 ]])

In [106]:
np.cov(S, rowvar=False)

array([[ 1.01430017, -0.00290286, -0.00151377],
       [-0.00290286,  1.00609194, -0.0013115 ],
       [-0.00151377, -0.0013115 ,  0.9850772 ]])

In [107]:
np.random.randint(0, 10, size=(3, 3)) # randomly generate integers

array([[0, 8, 3],
       [4, 4, 8],
       [2, 8, 7]])

In [108]:
np.random.choice(10, size=(2, 3))

array([[3, 7, 3],
       [5, 2, 6]])