In [3]:
import numpy as np
import pandas as pd

# Load the dataset
columns = ['Gender', 'Length', 'Diameter', 'Height', 'Whole_weight', 'Shucked_weight', 'Viscera_weight', 'Shell_weight', 'Rings']
data = pd.read_csv('abalone.csv', names=columns)

# Drop the gender feature
data = data.drop(columns=['Gender'])

# Convert the dataframe into a NumPy array for easier processing
X = data.iloc[:, :-1].values  # Features (all columns except the last)
y = data.iloc[:, -1].values    # Target variable (the last column)

# Split the dataset into training and testing sets
np.random.seed(42)  # For reproducibility
indices = np.random.permutation(len(X))
train_size = int(len(X) * 0.8)
train_indices, test_indices = indices[:train_size], indices[train_size:]

X_train, X_test = X[train_indices], X[test_indices]
y_train, y_test = y[train_indices], y[test_indices]

# Function to calculate Mean Squared Error
def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# Linear Regression Implementation
def linear_regression(X, y):
    # Add a bias term (column of ones) to the feature matrix
    X_b = np.c_[np.ones((X.shape[0], 1)), X]  # Add x0 = 1 to each instance
    theta_best = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
    return theta_best

# Predict using Linear Regression
theta_linear = linear_regression(X_train, y_train)
X_test_b = np.c_[np.ones((X_test.shape[0], 1)), X_test]  # Add bias term for test
y_pred_train_linear = X_train.dot(theta_linear[1:]) + theta_linear[0]
y_pred_test_linear = X_test_b.dot(theta_linear)

# Calculate MSE for Linear Regression
mse_train_linear = mean_squared_error(y_train, y_pred_train_linear)
mse_test_linear = mean_squared_error(y_test, y_pred_test_linear)

# Polynomial Regression Implementation
def polynomial_features(X, degree):
    """Generate polynomial features."""
    return np.column_stack([X ** d for d in range(1, degree + 1)])

def polynomial_regression(X, y, degree):
    X_poly = polynomial_features(X, degree)
    X_poly_b = np.c_[np.ones((X_poly.shape[0], 1)), X_poly]  # Add bias term
    theta_best = np.linalg.inv(X_poly_b.T.dot(X_poly_b)).dot(X_poly_b.T).dot(y)
    return theta_best

# Predict using Polynomial Regression
for degree in [2, 3]:
    theta_poly = polynomial_regression(X_train, y_train, degree)
    X_poly_test = polynomial_features(X_test, degree)
    X_poly_train = polynomial_features(X_train, degree)
    
    y_pred_train_poly = X_poly_train.dot(theta_poly[1:]) + theta_poly[0]
    y_pred_test_poly = np.c_[np.ones((X_poly_test.shape[0], 1)), X_poly_test].dot(theta_poly)
    
    mse_train_poly = mean_squared_error(y_train, y_pred_train_poly)
    mse_test_poly = mean_squared_error(y_test, y_pred_test_poly)
    
    print(f'Polynomial Regression (degree={degree}):')
    print(f'Train MSE: {mse_train_poly:.4f}, Test MSE: {mse_test_poly:.4f}')

# RBF Kernel Regression Implementation
def rbf_kernel(x1, x2, gamma=0.1):
    """Compute the RBF kernel between two sets of samples."""
    sq_dists = np.sum(x1**2, axis=1).reshape(-1, 1) + np.sum(x2**2, axis=1) - 2 * np.dot(x1, x2.T)
    return np.exp(-gamma * sq_dists)

def rbf_regression(X_train, y_train, X_test, gamma=0.1):
    K_train = rbf_kernel(X_train, X_train, gamma)
    K_test = rbf_kernel(X_test, X_train, gamma)
    alpha = np.linalg.inv(K_train).dot(y_train)  # (K_train)^-1 * y_train
    y_pred_train = K_train.dot(alpha)
    y_pred_test = K_test.dot(alpha)
    return y_pred_train, y_pred_test

# Predict using RBF Kernel Regression
y_pred_train_rbf, y_pred_test_rbf = rbf_regression(X_train, y_train, X_test)

# Calculate MSE for RBF Kernel Regression
mse_train_rbf = mean_squared_error(y_train, y_pred_train_rbf)
mse_test_rbf = mean_squared_error(y_test, y_pred_test_rbf)

print(f'RBF Kernel Regression:')
print(f'Train MSE: {mse_train_rbf:.4f}, Test MSE: {mse_test_rbf:.4f}')

# Final results
print(f'Linear Regression:')
print(f'Train MSE: {mse_train_linear:.4f}, Test MSE: {mse_test_linear:.4f}')

Polynomial Regression (degree=2):
Train MSE: 4.7083, Test MSE: 3.7380
Polynomial Regression (degree=3):
Train MSE: 4.6784, Test MSE: 3.7068
RBF Kernel Regression:
Train MSE: 586987.3921, Test MSE: 5007640.9091
Linear Regression:
Train MSE: 5.1059, Test MSE: 4.1286
