In [1]:
import numpy as np

# Exercises

There are three exercises in this notebook:

1. Use the cross-validation method to test the linear regression with different $\alpha$ values, at least three.
2. Implement a SGD method that will train the Lasso regression for 10 epochs.
3. Extend the Fisher's classifier to work with two features. Use the class as the $y$.

## 1. Cross-validation linear regression

You need to change the variable ``alpha`` to be a list of alphas. Next do a loop and finally compare the results.

In [2]:
x = np.array([188, 181, 197, 168, 167, 187, 178, 194, 140, 176, 168, 192, 173, 142, 176]).reshape(-1, 1).reshape(15,1)
y = np.array([141, 106, 149, 59, 79, 136, 65, 136, 52, 87, 115, 140, 82, 69, 121]).reshape(-1, 1).reshape(15,1)

x = np.asmatrix(np.c_[np.ones((15,1)),x])

I = np.identity(2)
alpha = 0.1 # change here

# add 1-3 line of code here
alphas = [0.01, 0.1, 1.0]
weights = []
mse_values = []

for alpha in alphas:
    w = np.linalg.inv(x.T*x + alpha * I)*x.T*y
    w = w.ravel()
    weights.append(w)

# add 1-3 lines to compare the results
for i, (alpha, w) in enumerate(zip(alphas, weights)):
    y_pred = x * np.matrix(w).T
    mse = np.mean(np.square(y - y_pred))
    mse_values.append(mse)
    print(f"Alpha: {alpha}, Weights: {w}, MSE: {mse:.4f}")

Alpha: 0.01, Weights: [[-167.85534019    1.54416013]], MSE: 373.7938
Alpha: 0.1, Weights: [[-101.72397081    1.16978757]], MSE: 426.0451
Alpha: 1.0, Weights: [[-20.59044706   0.71048616]], MSE: 592.4636


## 2. Implement based on the Ridge regression example, the Lasso regression.

Please implement the SGD method and compare the results with the sklearn Lasso regression results. 

In [3]:
def sgd(X, y, alpha=0.1, learning_rate=0.0001, epochs=10):
    np.random.seed(42)
    w = np.zeros((X.shape[1], 1))
    n_samples = X.shape[0]
    
    X = np.asarray(X)
    y = np.asarray(y)
    
    for epoch in range(epochs):
        indices = np.random.permutation(n_samples)
        X_shuffled = X[indices]
        y_shuffled = y[indices]
        
        for i in range(n_samples):
            xi = X_shuffled[i:i+1]
            yi = y_shuffled[i:i+1]
            
            y_pred = np.dot(xi, w)
            
            grad_mse = -2 * xi.T.dot(yi - y_pred) / n_samples
            
            grad_l1 = alpha * np.sign(w)
            
            w = w - learning_rate * (grad_mse + grad_l1)
    
    return w


In [4]:
x = np.array([188, 181, 197, 168, 167, 187, 178, 194, 140, 176, 168, 192, 173, 142, 176]).reshape(-1, 1).reshape(15,1)
y = np.array([141, 106, 149, 59, 79, 136, 65, 136, 52, 87, 115, 140, 82, 69, 121]).reshape(-1, 1).reshape(15,1)

x = np.asmatrix(np.c_[np.ones((15,1)),x])

I = np.identity(2)
alpha = 0.1 

# Update this line to use Lasso regression
w = sgd(x, y, alpha=alpha, learning_rate=0.0000001, epochs=10)
w = w.ravel()
w


array([0.00019717, 0.0356383 ])

## 3. Extend the Fisher's classifier

Please extend the targets of the ``iris_data`` variable and use it as the $y$.

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

iris_data = load_iris()
iris_df = pd.DataFrame(iris_data.data,columns=iris_data.feature_names)
iris_df.head()

x = iris_df.loc[iris_data.target < 2, 'sepal width (cm)'].values # change here
y = iris_data.target[iris_data.target < 2] # change here

dataset_size = np.size(x)

mean_x, mean_y = np.mean(x), np.mean(y)

SS_xy = np.sum(y * x) - dataset_size * mean_y * mean_x
SS_xx = np.sum(x * x) - dataset_size * mean_x * mean_x

a = SS_xy / SS_xx
b = mean_y - a * mean_x


y_pred = a * x + b
y_pred

array([ 0.20927814,  0.57177423,  0.42677579,  0.49927501,  0.13677892,
       -0.08071873,  0.28177735,  0.28177735,  0.64427344,  0.49927501,
        0.0642797 ,  0.28177735,  0.57177423,  0.57177423, -0.15321795,
       -0.44321482, -0.08071873,  0.20927814, -0.00821952, -0.00821952,
        0.28177735,  0.0642797 ,  0.13677892,  0.35427657,  0.28177735,
        0.57177423,  0.28177735,  0.20927814,  0.28177735,  0.42677579,
        0.49927501,  0.28177735, -0.22571717, -0.29821639,  0.49927501,
        0.42677579,  0.20927814,  0.13677892,  0.57177423,  0.28177735,
        0.20927814,  1.07926875,  0.42677579,  0.20927814, -0.00821952,
        0.57177423, -0.00821952,  0.42677579,  0.0642797 ,  0.35427657,
        0.42677579,  0.42677579,  0.49927501,  1.07926875,  0.71677266,
        0.71677266,  0.35427657,  1.00676953,  0.64427344,  0.78927188,
        1.2967664 ,  0.57177423,  1.15176797,  0.64427344,  0.64427344,
        0.49927501,  0.57177423,  0.78927188,  1.15176797,  0.93