<a href="https://colab.research.google.com/github/cyrus2281/notes/blob/main/MachineLearning/ML_Prerequisites.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Numpy

NumPy: Numeric Python

Similar to python array list.

Many of the operations are written in C.

Allows to convert data into numbers.

**Applications**
- Linear Regressions
- Logistic Regression
- Deep Neural Networks
- K-Means Clustering
- Density Estimation
- Principal Components Analysis
- Matrix Factorization
- Support Vector Machines
- Markov Models, Hidden Markov Models
- Control Systems
- Game Theory
- Operation Research
- Portfolio Optimization




In [58]:
import numpy as np

In [93]:
a1 = np.array([1,2,3])

a2 = np.array([[1,2.0,3.3],
               [4,5,6.5]])


a3 = np.array([
    [[1,2,3],
     [4,5,6],
     [7,8,9]],
    [[10,11,12],
     [13,14,15],
     [16,17,18]
    ]
  ])

# Array shape
print(a1.shape, a2.shape, a3.shape)

# Number of dimensions
print(a1.ndim, a2.ndim, a3.ndim)

# Data type
print(a1.dtype, a2.dtype, a3.dtype)

# Size (total)
print(a1.size, a2.size, a3.size)

# Object Type
print(type(a1), type(a2), type(a3))

(3,) (2, 3) (2, 3, 3)
1 2 3
int64 float64 int64
3 6 18
<class 'numpy.ndarray'> <class 'numpy.ndarray'> <class 'numpy.ndarray'>


### Arrays vs List

In [59]:
L = [1,2,3]
A = np.array([1,2,3])

for e in L:
  print(e)

for e in A:
  print(e)

1
2
3
1
2
3


In [60]:
L.append(4)
# Can't append to an array
print(L + [5])
# But in numpy it broadcast the value
print(A + np.array([4]))
print(A + np.array([4,5,6])) # Must be the same length or 1

[1, 2, 3, 4, 5]
[5 6 7]
[5 7 9]


In [61]:
print(2 * L) # repeats the list
print(2 * L == L + L) # repeats the list
print(2 * A) # multipy each value by 2

[1, 2, 3, 4, 1, 2, 3, 4]
True
[2 4 6]


In [62]:
L2 = [e**2 for e in L]
print(L2)
print(A**2)

[1, 4, 9, 16]
[1 4 9]


In [63]:
print(np.sqrt(A))
print(np.log(A))
print(np.exp(A))
print(np.tanh(A))

[1.         1.41421356 1.73205081]
[0.         0.69314718 1.09861229]
[ 2.71828183  7.3890561  20.08553692]
[0.76159416 0.96402758 0.99505475]


In [95]:
# View Unique elements
np.unique(A)

array([1. , 1.5, 4. ])

In [97]:
# Flattening the data to one dimensional
print(a3.ravel())
print(a3.flatten())

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]
[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18]


### Dot Product

In [64]:
a = np.array([1,2])
b = np.array([3,4])

In [65]:
dot = 0
for e,f in zip(a,b):
  dot += e*f
print(dot)

print(a * b)
print(np.sum(a * b))
print(np.dot(a, b))
print(a.dot(b))
print(a @ b)

11
[3 8]
11
11
11
11


$$
a^Tb = \|a\| \ \|b\| \ \cos \theta_{ab} \\
\cos \theta_{ab} = \frac{a^Tb}{\|a\| \ \| b \| } \\
\| a \| = \sqrt{\sum_{d=1}^D a^2_d}
$$

In [66]:
amag = np.sqrt(np.sum(a**2))
print(amag)

print(np.linalg.norm(a))

2.23606797749979
2.23606797749979


In [67]:
cosangle = a @ b / (np.linalg.norm(a) * np.linalg.norm(b))
print(cosangle)

angle = np.arccos(cosangle)
print(angle)

0.9838699100999074
0.17985349979247847


### Matrics

In [68]:
L = [
    [1,2],
    [3,4]
]

A = np.array([
    [1,2],
    [3,4]
])

print(L)
print(A)

print(L[0])
print(L[0][1])

print(A[0])
print(A[0][1])
print(A[0,1])

[[1, 2], [3, 4]]
[[1 2]
 [3 4]]
[1, 2]
2
[1 2]
2
2


In [69]:
print(A[:,0])

print(A.T)

[1 3]
[[1 3]
 [2 4]]


In [70]:
print(np.exp(A))
print(np.exp(L)) # numpy automatically coverts list to numpy

[[ 2.71828183  7.3890561 ]
 [20.08553692 54.59815003]]
[[ 2.71828183  7.3890561 ]
 [20.08553692 54.59815003]]


In [71]:
B = np.array([
    [1,2,3],
    [4,5,6]
])

print(A @ B)

[[ 9 12 15]
 [19 26 33]]


In [72]:
print(np.linalg.det(A))

print(np.linalg.inv(A)) # Not exact - approximation

print(np.linalg.inv(A).dot(A))

print(np.trace(A))

-2.0000000000000004
[[-2.   1. ]
 [ 1.5 -0.5]]
[[1.00000000e+00 0.00000000e+00]
 [1.11022302e-16 1.00000000e+00]]
5


In [73]:
np.diag(A) # A vector of diagonal values

np.diag([1,4]) # A matrix with diagonal values

array([[1, 0],
       [0, 4]])

In [74]:
Lam, V = np.linalg.eig(A) # [eigenvalues, eigenvectors]

print(V[:,0] * Lam[0])

print(A @ V[:,0])

print(V[:,0] * Lam[0] == A @ V[:,0]) # Numerical positioning error

print(np.allclose(V[:,0] * Lam[0] , A @ V[:,0]))

print(np.allclose(V @ np.diag(Lam) , A @ V))

[ 0.30697009 -0.21062466]
[ 0.30697009 -0.21062466]
[ True False]
True
True


In [75]:
# eigen decomposition of a complex Hermitian (conjugate symmetric) or a real symmetric matric
print(np.linalg.eigh(A))

(array([-0.85410197,  5.85410197]), array([[-0.85065081,  0.52573111],
       [ 0.52573111,  0.85065081]]))


$$
x = \begin{pmatrix}
x_1 \\
x_2
\end{pmatrix},
A = \begin{pmatrix}
1 & 1 \\
1.5 & 4
\end{pmatrix},
b = \begin{pmatrix}
2200 \\
5050
\end{pmatrix} \\
Ax = b \Leftrightarrow x = A^{-1} b
$$

In [76]:
# inverse is slow and less accourate
# Numpy has a better algorithm called *solve* for linear systems
A = np.array([[1,1], [1.5,4]])
b = np.array([2200, 5050])

# BAD
print(np.linalg.inv(A).dot(b)) # BAD BAD BAD

# Good
print(np.linalg.solve(A, b)) # Good

[1500.  700.]
[1500.  700.]


### Generating Data



In [77]:
print(np.zeros((2,3)))

print(np.ones((2,3)))

print(10 * np.ones((2,3)))

[[0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[10. 10. 10.]
 [10. 10. 10.]]


In [78]:
print(np.eye(3))

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [94]:
# Range filled array
print(np.arange(0, 10, 2))

[0 2 4 6 8]


In [79]:
print(np.random.random()) # Uniform 0-1 distribution

print(np.random.random((2,3))) # shape

0.1211406080121964
[[0.32148374 0.11965323 0.45536575]
 [0.38475188 0.37329441 0.58583827]]


In [83]:
# Normal (Gaussion) Distribution
print(np.random.randn(2,3))

R = np.random.randn(1000)

print(R.mean())
print(R.var())
print(R.std())

[[ 1.42737805  0.14350172 -1.14614834]
 [-1.08575042 -0.50712295 -1.2140374 ]]
-0.03920715951438183
1.0288363350838907
1.0143156979382162


In [86]:
R = np.random.randn(1000, 3)

print(R.mean(axis=0))
print(R.mean(axis=1).shape)

[-0.0203346   0.0077361  -0.00200394]
(1000,)


In [90]:
# Covariance
print(np.cov(R).shape) # By default is set to axis 0 , column

print(np.cov(R, rowvar=False))

print(np.cov(R.T))

(1000, 1000)
[[ 0.98538688  0.03421111 -0.02697163]
 [ 0.03421111  1.04196197  0.0293987 ]
 [-0.02697163  0.0293987   1.02774348]]
[[ 0.98538688  0.03421111 -0.02697163]
 [ 0.03421111  1.04196197  0.0293987 ]
 [-0.02697163  0.0293987   1.02774348]]


In [91]:
print(np.random.randint(0, 10, size=(3,3))) # low, high, size

print(np.random.choice(10, size=(3,3))) # Chooses random values with max 10 with size 3,3

[[4 3 7]
 [2 6 5]
 [7 6 7]]


In [92]:
# generating random values in interval evenly spaced
print(np.linspace(-6, 6, 100))

[-6.         -5.87878788 -5.75757576 -5.63636364 -5.51515152 -5.39393939
 -5.27272727 -5.15151515 -5.03030303 -4.90909091 -4.78787879 -4.66666667
 -4.54545455 -4.42424242 -4.3030303  -4.18181818 -4.06060606 -3.93939394
 -3.81818182 -3.6969697  -3.57575758 -3.45454545 -3.33333333 -3.21212121
 -3.09090909 -2.96969697 -2.84848485 -2.72727273 -2.60606061 -2.48484848
 -2.36363636 -2.24242424 -2.12121212 -2.         -1.87878788 -1.75757576
 -1.63636364 -1.51515152 -1.39393939 -1.27272727 -1.15151515 -1.03030303
 -0.90909091 -0.78787879 -0.66666667 -0.54545455 -0.42424242 -0.3030303
 -0.18181818 -0.06060606  0.06060606  0.18181818  0.3030303   0.42424242
  0.54545455  0.66666667  0.78787879  0.90909091  1.03030303  1.15151515
  1.27272727  1.39393939  1.51515152  1.63636364  1.75757576  1.87878788
  2.          2.12121212  2.24242424  2.36363636  2.48484848  2.60606061
  2.72727273  2.84848485  2.96969697  3.09090909  3.21212121  3.33333333
  3.45454545  3.57575758  3.6969697   3.81818182  3.

# Matplotlib

# Pandas

# SciPy