In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import math

#### 1- See how round-off errors behave when computing a derivative of a function. Use $f_0 = sin( x_0 )$ and compute its derivative with different discretization ranges. Experiment with different points $x_0$ of differentiation.

Steps:
+ Choose a point of differentation $x_0$
+ Choose a function $f_0$. Here $sin$ is given.
+ Assign the derivative $f_0$ to, for example, $f_p$
+ Choose a range $i = [ -20, 0 ]$, with 0.5 steps, of discretizations $h = 10^{(\text{list of i})}$
+ Compute the absolute error: $|f_p - \frac{f_0( x_0+h ) - f_0 }{h}|$
+ Compare the absolute error against the discretization error without round-off errors $\frac{1}{2}|f^{''}(x_0)|h$

In [None]:
x0 = 1.2
f0 = 
fp = 
i  = np.arange()
h  = 

In [None]:
err =  # Tip: Use list comprehension
derr = f0 / 2 * h # Discretization error without roundoff 

In [None]:
plt.loglog( h, ?, '-ob' )
plt.loglog( h, ?, '--r' )
plt.title( f'Discretation and round-off error: x0: {x0}, f0: Sin(x0), fp: Cos(x0)')
plt.xlabel( 'Discretization h' )
plt.ylabel( 'Absolute error' )
plt.show()

#### 2- Linear least-squares revisited on polynomial fitting with normal equations. Compare the polynomial coefficients to the backpropagation method from the previous exercise.

Solve the algebraic problem of $\underset{\vec{x}}{\text{min}} ||\vec{b}-A\vec{x}||_2, \ A \in \mathbb{R}^{m \times n}, \ \vec{x} \in \mathbb{R}^n, \ \vec{b} \in \mathbb{R}^m, m \geq n$

+ Create $B = A^T A$ (a), and $\vec{y} = A^T \vec{b}$ (b)
+ Use Cholesky Factorization for solving $B$. That is, for $B = GG^T$:
    + Solve lower triangular system $G\vec{z} = \vec{y}$ for $\vec{z}$ (c)
    + Solve upper triangular system $G^T\vec{x} = \vec{z}$ for $\vec{x}$ (d)
    + Reminder/tip: Lower triangular system: below the diagnoal of a matrix. 

In [None]:
def least_squares_fit(t, b, n):
    t = t.reshape(-1, 1)
    b = b.reshape(-1, 1)
    m = # Hint: number of equidistant points
    A = # Hint: Contains all ones
    for j in range(1, n):
        A[:, j] = A[:, j-1] * t.flatten()
    B = ? @  # (a)
    y = ? @  # (b)
    coefs = np.linalg.solve(B, y)
    return coefs.flatten()

In [None]:
m = ?
d = ? # max degree of polynomial fitting
tt = np.linspace(0, 1, m)
bb = np.cos(2 * np.pi * tt) # Try out other functions like sine

coefs = {}
for n in range(1, ?):
    coefs[n] = least_squares_fit(tt, bb, n)

t = np.linspace( 0, 1, 101 )
z = np.ones( ( ?, 101 ) )
# (c) lower triangular G (Cholesky factor)
for n in range( 1, ? ):
    z[n-1, :] = z[n-1, :] * ?
    # (d) upper triangular G^T (Cholesky factor)
    for j in range( ?, ?, ?):
        z[n-1, :] = z[n-1, :] * t + ?

plt.plot(t, ?, tt, bb, 'ro')
plt.plot( tt, bb, label = 'data')
plt.legend()
plt.xlabel('t')
plt.ylabel('p_{n-1}')
plt.show()

In [None]:
# Backpropagation code here

In [None]:
# Print polynomial coefficients from using backpropagation and least-squares with normal equations


#### 3- Eigenvalue solvers for blind source separation: How sensitive Principal Component Analysis (PCA) and Independent Component Analysis (ICA) are to the number of observations, given a random process? What changes the most in both PCA and ICA projections?

##### Solving the eigenvalue problem $A\vec{x} = \lambda \vec{x}, \ A \in \mathbb{R}^2, \ \vec{x}, \lambda \in \mathbb{R}$

##### Both PCA and ICA:uses Singular Value Decomposition method for extracting relevant eigenvalues and vectors from a given data: $A = U \Sigma V^T$, where $U,V$ are the left and right orthonormal bases vectors of the eigenvalues $\Sigma$, which is sorted from high to low. 

Try out:
+ Student t Distribution ( https://en.wikipedia.org/wiki/Student's_t-distribution )
+ Pareto Distribution ( https://en.wikipedia.org/wiki/Pareto_distribution )
+ As a bonus 1: check other distributions from np_rng class. Use external sources to checkout properties of other distributions if you're going to use other than the two above.
+ Bonus 2: Check out the ratio of the largest and smallest eigenvalues of the observed data, using np.linalg.svd

In [None]:
from sklearn.decomposition import PCA, FastICA
from numpy.random import SeedSequence, default_rng
rng = default_rng( SeedSequence().entropy )

np_rng = np.random.RandomState( 4432 )

S      = np_rng.standard_t(1.5, size=(20000, 2)) # Student t Distribution
S[:, 0] *= 2.

# Mix data
A = np.array([[1, 0], [0, 2]])  # Mixing matrix

# Generate observations by computing the matrix product SA^T
X = np.dot(S, A.T)  

pca    = PCA()
S_pca_ = pca.fit(X).transform(X)

ica    = FastICA(random_state=rng.integers( 42 ) )
S_ica_ = ica.fit(X).transform(X)  # Estimate the sources

# Scale row-wise using standard deviation of S_ica_
S_ica_ /= S_ica_.std(axis=0)

In [None]:
def plot_samples(S, axis_list=None):
    plt.scatter(S[:, 0], S[:, 1], s=2, marker='o', zorder=10,
                color='steelblue', alpha=0.5)
    if axis_list is not None:
        colors = ['orange', 'red']
        for color, axis in zip(colors, axis_list):
            axis /= axis.std()
            x_axis, y_axis = axis
            # Trick to get legend to work
            plt.plot(0.1 * x_axis, 0.1 * y_axis, linewidth=2, color=color)
            plt.quiver((0, 0), (0, 0), x_axis, y_axis, zorder=11, width=0.01,
                       scale=6, color=color)

    plt.hlines(0, -3, 3)
    plt.vlines(0, -3, 3)
    plt.xlim(-3, 3)
    plt.ylim(-3, 3)
    plt.xlabel('x')
    plt.ylabel('y')

In [None]:
plt.figure( figsize = ( 14, 9 ) )
plt.subplot(2, 2, 1)
plot_samples(S / S.std())
plt.title('True Independent Sources')

# PCA components and ICA mixing components
axis_list = [pca.components_.T, ica.mixing_]
plt.subplot(2, 2, 2)
plot_samples(X / np.std(X), axis_list=axis_list)
legend = plt.legend(['PCA', 'ICA'], loc='upper right')
legend.set_zorder(100)

plt.title('Observations')

plt.subplot(2, 2, 3)
plot_samples(S_pca_ / np.std(S_pca_, axis=0))
plt.title('PCA recovered signals')

plt.subplot(2, 2, 4)
plot_samples(S_ica_ / np.std(S_ica_))
plt.title('ICA recovered signals')

plt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.36)
plt.show()

#### 4- Implement a linear regression using automatic differentiation. Try with different learning rates and number of iterations for the gradient descent steps. How sensitive the model is to the learning rate and iterations?
+ (a) implement Wx + b
+ (b) implement Mean squared error ( hint: np.square )
+ (c) vary number of points n and noise level. How sensitive the method is?

In [None]:
!pip install autograd

In [None]:
import autograd.numpy as np
from autograd import grad

In [None]:
# Define the linear regression model: Xw + b ( hint: dot product )
def model(X, w, b):
    return ? # (a)

# Define the loss function (Mean Squared Error)
def loss(params, X, y):
    return np.mean( ? ) # (b)

# Create a gradient function for the loss function
loss_grad = grad( ? )

In [None]:
# Generate sample data using a list of integers
n     = 10 # (c)
X     = np.arange(n).reshape(-1, 1)  # Reshape to 10x1 vector
noise = np.random.randn(n, 1) # Add some noise (c)
y      = 2 * X + 1 + noise

print('Num of data points:', n)
print('X shape:', X.shape)
print('noise shape:', noise.shape)
print('y shape:', y.shape)

In [None]:
# Initialize weights and biases randomly
w = np.random.rand(1, 1)
b = np.random.rand(1, 1)

# Perform gradient descent
learning_rate = 0.01
for i in range(100):
    grad_w, grad_b = loss_grad([w, b], X, y)
    print(f'i: {i}, w={w}, loss={loss([w, b], X, y)}, grad_w={grad_w}, grad_b={grad_b}')
    w -= learning_rate * grad_w
    b -= learning_rate * grad_b

# Print the learned weights and biases
print("Learned w:", w)
print("Learned b:", b)

In [None]:
# Make predictions on the same data
predictions = model(X, w, b)

# Compare predicted and real values
print("Predicted values:", predictions)
print("Actual values:", y)

# Visualize the comparison (optional)
import matplotlib.pyplot as plt
plt.plot(X, y, 'o', label='Actual data')
plt.plot(X, predictions, '-x', label='Predictions')
plt.legend()
plt.show()