In [7]:
import pandas as pd
import numpy as np

In [8]:
data = pd.read_csv('Fish Data - A2.csv')

mean = data.mean()
std = data.std()

In [9]:
mean, std

(Height      8.970994
 Width       4.417486
 Weight    398.326415
 dtype: float64,
 Height      4.286208
 Width       1.685804
 Weight    357.978317
 dtype: float64)

In [10]:
data.isna().sum()

Height    0
Width     0
Weight    0
dtype: int64

### Preprocessing

In [11]:
data.fillna(data.mean(), inplace=True)

In [12]:
mean_ht = data['Height'].mean()
mean_wd = data['Width'].mean()
std_ht = data['Height'].std()
std_wd = data['Width'].std()

mean_ht, mean_wd, std_ht, std_wd

(8.970993710691824, 4.417485534591195, 4.286207619968867, 1.6858038699921671)

In [13]:
data['Width'] = (data['Width'] - mean_wd) / std_wd
data['Height'] = (data['Height'] - mean_ht) / std_ht

In [17]:
data = data.sample(frac=1, random_state=42)

train_size = int(0.8 * len(data))

train_data = data[:train_size]
test_data = data[train_size:]

X_train_ht = train_data['Height'].values
X_train_wd = train_data['Width'].values
X_test_ht = test_data['Height'].values
X_test_wd = test_data['Width'].values

y_train = train_data['Weight'].values
y_test = test_data['Weight'].values

In [18]:
len(train_data), len(test_data)

(127, 32)

### Polynomial Regression

In [102]:
degrees = [_ for _ in range(10)]
q_vals = [0.5, 1, 2, 4]

In [107]:
import random

def create_pol(X1, X2, degree):
    X_pol = np.column_stack([X1 ** i * X2 ** j for i in range(degree + 1) for j in range(degree + 1) if (i + j) <= degree])
    return X_pol

def init_wt_bias(degree):
    w = np.random.randn(len(create_pol(1, 1, degree)[0])) * 0.01
    b = 0
    return w, b

def eval_model(X, y, w, b):
    predictions = np.dot(X, w) + b
    mse = np.mean((predictions - y) ** 2)
    return mse

def batch_gradient_descent(X, y, w, b, lr, num_iters, lambda_, q):
    for _ in range(num_iters):
        prediction = np.dot(X, w) + b
        gradient_w = (1/len(y)) * np.dot(X.T, (prediction - y)) + 0.5* lambda_ * q * np.power(w, q - 1)
        gradient_b = (1/len(y)) * np.sum(prediction - y)
        
        w -= lr * gradient_w
        b -= lr * gradient_b
        
    return w, b

def stochastic_gradient_descent(X, y, w, b, lr, num_iters, lambda_, q):
    N = len(y)
    
    for _ in range(num_iters):
        for i in range(N):
            random_idx = random.randint(0, N - 1)
            xi = X[random_idx]
            yi = y[random_idx]
            
            prediction = np.dot(xi, w) + b
            
            gradient_w = (1/N) * np.dot(xi, (prediction - yi)) + 0.5 * lambda_ * q * np.power(w, q - 1)
            gradient_b = (1/N) * np.sum(prediction - yi)
            
            w -= lr * gradient_w
            b -= lr * gradient_b
        
    return w, b

In [108]:
lambdas_ = np.linspace(0, 1, num=11)

In [109]:
models = {}

for q in q_vals:
    for lambda_ in lambdas_:
        print(f'Experimenting with q = {q}, lambda = {lambda_}')
        
        for degree in degrees:
            X_train_pol = create_pol(X_train_ht, X_train_wd, degree=degree)
            
            w, b = init_wt_bias(degree=degree)
            
            w_batch, b_batch = batch_gradient_descent(X_train_pol, y_train, w, b, 0.0005, 500, lambda_=lambda_, q=q)
            w_stoc, b_stoc = stochastic_gradient_descent(X_train_pol, y_train, w, b, 0.0005, 500, lambda_=lambda_, q=q)
            
            X_test_pol = create_pol(X_test_ht, X_test_wd, degree=degree)
            test_error_batch = eval_model(X_test_pol, y_test, w_batch, b_batch)
            test_error_stoc = eval_model(X_test_pol, y_test, w_stoc, b_stoc)
            
            models[(q, lambda_, 'B')] = (test_error_batch)
            models[(q, lambda_, 'S')] = (test_error_stoc)

Experimenting with q = 0.5, lambda = 0.0


  gradient_w = (1/len(y)) * np.dot(X.T, (prediction - y)) + 0.5* lambda_ * q * np.power(w, q - 1)


Experimenting with q = 0.5, lambda = 0.1
Experimenting with q = 0.5, lambda = 0.2
Experimenting with q = 0.5, lambda = 0.30000000000000004
Experimenting with q = 0.5, lambda = 0.4
Experimenting with q = 0.5, lambda = 0.5
Experimenting with q = 0.5, lambda = 0.6000000000000001
Experimenting with q = 0.5, lambda = 0.7000000000000001
Experimenting with q = 0.5, lambda = 0.8
Experimenting with q = 0.5, lambda = 0.9
Experimenting with q = 0.5, lambda = 1.0
Experimenting with q = 1, lambda = 0.0


  mse = np.mean((predictions - y) ** 2)
  w -= lr * gradient_w


Experimenting with q = 1, lambda = 0.1
Experimenting with q = 1, lambda = 0.2


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Experimenting with q = 1, lambda = 0.30000000000000004
Experimenting with q = 1, lambda = 0.4
Experimenting with q = 1, lambda = 0.5
Experimenting with q = 1, lambda = 0.6000000000000001
Experimenting with q = 1, lambda = 0.7000000000000001
Experimenting with q = 1, lambda = 0.8
Experimenting with q = 1, lambda = 0.9
Experimenting with q = 1, lambda = 1.0
Experimenting with q = 2, lambda = 0.0


  gradient_w = (1/len(y)) * np.dot(X.T, (prediction - y)) + 0.5* lambda_ * q * np.power(w, q - 1)


Experimenting with q = 2, lambda = 0.1
Experimenting with q = 2, lambda = 0.2
Experimenting with q = 2, lambda = 0.30000000000000004
Experimenting with q = 2, lambda = 0.4
Experimenting with q = 2, lambda = 0.5
Experimenting with q = 2, lambda = 0.6000000000000001
Experimenting with q = 2, lambda = 0.7000000000000001
Experimenting with q = 2, lambda = 0.8
Experimenting with q = 2, lambda = 0.9
Experimenting with q = 2, lambda = 1.0
Experimenting with q = 4, lambda = 0.0


  gradient_w = (1/len(y)) * np.dot(X.T, (prediction - y)) + 0.5* lambda_ * q * np.power(w, q - 1)


Experimenting with q = 4, lambda = 0.1
Experimenting with q = 4, lambda = 0.2
Experimenting with q = 4, lambda = 0.30000000000000004
Experimenting with q = 4, lambda = 0.4
Experimenting with q = 4, lambda = 0.5
Experimenting with q = 4, lambda = 0.6000000000000001
Experimenting with q = 4, lambda = 0.7000000000000001
Experimenting with q = 4, lambda = 0.8
Experimenting with q = 4, lambda = 0.9
Experimenting with q = 4, lambda = 1.0


In [110]:
models

{(0.5, 0.0, 'B'): nan,
 (0.5, 0.0, 'S'): nan,
 (0.5, 0.1, 'B'): nan,
 (0.5, 0.1, 'S'): nan,
 (0.5, 0.2, 'B'): nan,
 (0.5, 0.2, 'S'): nan,
 (0.5, 0.30000000000000004, 'B'): nan,
 (0.5, 0.30000000000000004, 'S'): nan,
 (0.5, 0.4, 'B'): nan,
 (0.5, 0.4, 'S'): nan,
 (0.5, 0.5, 'B'): nan,
 (0.5, 0.5, 'S'): nan,
 (0.5, 0.6000000000000001, 'B'): nan,
 (0.5, 0.6000000000000001, 'S'): nan,
 (0.5, 0.7000000000000001, 'B'): nan,
 (0.5, 0.7000000000000001, 'S'): nan,
 (0.5, 0.8, 'B'): nan,
 (0.5, 0.8, 'S'): nan,
 (0.5, 0.9, 'B'): nan,
 (0.5, 0.9, 'S'): nan,
 (0.5, 1.0, 'B'): nan,
 (0.5, 1.0, 'S'): nan,
 (1, 0.0, 'B'): nan,
 (1, 0.0, 'S'): nan,
 (1, 0.1, 'B'): nan,
 (1, 0.1, 'S'): nan,
 (1, 0.2, 'B'): nan,
 (1, 0.2, 'S'): nan,
 (1, 0.30000000000000004, 'B'): nan,
 (1, 0.30000000000000004, 'S'): nan,
 (1, 0.4, 'B'): nan,
 (1, 0.4, 'S'): nan,
 (1, 0.5, 'B'): nan,
 (1, 0.5, 'S'): nan,
 (1, 0.6000000000000001, 'B'): nan,
 (1, 0.6000000000000001, 'S'): nan,
 (1, 0.7000000000000001, 'B'): nan,
 (1, 0.700