Assignment 4 of the course “Introduction to Machine Learning” at the University of Leoben.
Author: Fotios Lygerakis
Semester: SS 2022/2023

Import the libraries

In [1]:
import pandas as pd
import numpy as np

Create the Regression Models

In [2]:
class Predictor:
    def __init__(self):
        self.coefficients = None

    def fit(self, X, y):
        pass

    def predict(self, X):
        pass

class LinearRegression(Predictor):
    def __init__(self):
        self.coefficients = None

    def fit(self, X, y):
        X = np.insert(X, 0, 1, axis=1)
        y = y.values.reshape(-1, 1)
        self.coefficients = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)

    def predict(self, X):
        X = np.insert(X, 0, 1, axis=1)
        return X.dot(self.coefficients)

class RidgeRegression(Predictor):
    def __init__(self, alpha=1):
        self.alpha = alpha
        self.coefficients = None

    def fit(self, X, y):
        X = np.insert(X, 0, 1, axis=1)
        y = y.values.reshape(-1, 1)
        n_features = X.shape[1]
        I = np.identity(n_features)
        I[0, 0] = 0
        self.coefficients = np.linalg.inv(X.T.dot(X) + self.alpha * I).dot(X.T).dot(y)

    def predict(self, X):
        X = np.insert(X, 0, 1, axis=1)
        return X.dot(self.coefficients)

class LassoRegression(Predictor):
    def __init__(self, alpha=1, max_iter=1000, tol=0.0001):
        self.alpha = alpha
        self.max_iter = max_iter
        self.tol = tol
        self.coefficients = None

    def fit(self, X, y):
        X = np.insert(X, 0, 1, axis=1)
        y = y.values.reshape(-1, 1)
        n_samples, n_features = X.shape
        self.coefficients = np.zeros((n_features, 1))
        for _ in range(self.max_iter):
            coefficients_old = np.copy(self.coefficients)
            for j in range(n_features):
                X_j = X[:, j].reshape(-1, 1)
                y_pred = X.dot(self.coefficients) - X_j.dot(self.coefficients[j])
                rho = X_j.T.dot(y - y_pred)
                if j == 0:
                    self.coefficients[j] = rho
                else:
                    if rho < -self.alpha/2:
                        self.coefficients[j] = rho + self.alpha/2
                    elif rho > self.alpha/2:
                        self.coefficients[j] = rho - self.alpha/2
                    else:
                        self.coefficients[j] = 0
            if np.max(np.abs(self.coefficients

Data Preprocessing and Data loading functions

In [3]:
def preprocess(df):
    # Handle missing values
    df.replace(0, np.nan, inplace=True)
    df.dropna(inplace=True)

    # Remove outliers using Z-score
    z_scores = (df - df.mean()) / df.std()
    df = df[(z_scores.abs() < 3).all(axis=1)]

    # Normalize the data
    df_norm = (df - df.mean()) / df.std()

def train_test_split(X, y, test_size=0.2):
    # Split the data into training and test sets
    train_size = int(0.8 * len(diabetes_norm))
    train_set = diabetes_norm[:train_size]
    test_set = diabetes_norm[train_size:]

    # Separate features and target variable
    X_train = train_set.iloc[:, :-1]
    y_train = train_set.iloc[:, -1]
    X_test = test_set.iloc[:, :-1]
    y_test = test_set.iloc[:, -1]


def load_data():
    pass

In [4]:
# Load the diabetes dataset
df = pd.read_csv("diabetes.csv")

# Preprocess the dataset
df = preprocess(df)

Load the data

In [5]:
# Load the data
X_train, X_test, y_train, y_test = load_data()

TypeError: cannot unpack non-iterable NoneType object

Fit the models

In [None]:
# Fit the linear regression
linear_regression = LinearRegression()
linear_regression.fit(X_train, y_train)

# Fit the linear regression model to the training data
linreg = LinearRegression()
linreg.fit(X_train.values, y_train)

# Make predictions on the test data
y_pred_linreg = linreg.predict(X_test.values)

# Evaluate the performance of the linear regression model
mse_linreg = np.mean((y_test.values - y_pred_linreg)**2)
print("Linear regression mean squared error: %.2f" % mse_linreg)

In [None]:
# Fit the ridge regression
ridge_regression = RidgeRegression(alpha=1)
ridge_regression.fit(X_train, y_train)

In [None]:
# Fit the lasso regression
lasso_regression = LassoRegression(alpha=1, num_iters=10000, lr=0.001)
lasso_regression.fit(X_train, y_train)

Evaluate the models