### Imports

In [62]:
import wandb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier,  DecisionTreeRegressor

### Utils

#### Variables

In [11]:
wine_dataset_path = "./Datasets/WineQT.csv"
housing_dataset_path = "./Datasets/HousingData.csv"

random_state = 42

#### Base Models

In [108]:
class MLP_Regression:
    def __init__(self, input_size, hidden_layers, output_size, learning_rate=0.01, activation='sigmoid', optimizer='sgd', print_every=10, wandb_log=False):
        assert activation.lower() in ['sigmoid', 'relu', 'tanh'], "Activation function must be either 'sigmoid', 'relu' or 'tanh'"
        assert optimizer.lower() in ['sgd', 'bgd', 'mbgd'], "Optimizer must be either 'sgd', 'bgd' or 'mbgd'"
        assert input_size > 0, "Input size must be greater than 0"
        assert output_size > 0, "Output size must be greater than 0"
        assert learning_rate > 0, "Learning rate must be greater than 0"
        assert type(hidden_layers) == list and len(hidden_layers) > 0, "Hidden layers must be a list of size greater than 0"

        self.input_size = input_size
        self.hidden_layers = hidden_layers
        self.output_size = output_size
        self.learning_rate = learning_rate
        self.optimizer = optimizer
        self.activation_func = self._get_activation_func(activation)
        self.optimizer_func = self._get_optimizer_func(optimizer)
        self.weights, self.biases = self._initialize_weights_and_biases()

        self.wandb_log = wandb_log
        self.print_every = print_every

    # Activation functions
    def _get_activation_func(self, activation):
        if activation == "sigmoid":
            return self._sigmoid
        elif activation == "tanh":
            return self._tanh
        elif activation == "relu":
            return self._relu
        else:
            raise ValueError(f"Activation function '{activation}' not supported.")

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def _tanh(self, x):
        return np.tanh(x)

    def _relu(self, x):
        return np.maximum(0, x)

    # Activation derivative
    def _activation_derivative(self, Z):
        if self.activation_func == self._sigmoid:
            return self._sigmoid_derivative(Z)
        elif self.activation_func == self._tanh:
            return self._tanh_derivative(Z)
        elif self.activation_func == self._relu:
            return self._relu_derivative(Z)
        else:
            raise ValueError(
                f"Activation function '{self.activation_func}' not supported."
            )

    def _sigmoid_derivative(self, Z):
        return self._sigmoid(Z) * (1 - self._sigmoid(Z))

    def _tanh_derivative(self, Z):
        return 1 - np.square(self._tanh(Z))

    def _relu_derivative(self, Z):
        return np.where(Z > 0, 1, 0)

    # Optimizers
    def _get_optimizer_func(self, optimizer):
        if optimizer == "sgd":
            return self._sgd
        elif optimizer == "bgd":
            return self._bgd
        elif optimizer == "mbgd":
            return self._mbgd
        else:
            raise ValueError(f"Optimizer '{optimizer}' not supported.")

    def _sgd(self, grads):
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * grads["dW"][i]
            self.biases[i] -= self.learning_rate * grads["db"][i]

    def _bgd(self, grads):
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * grads["dW"][i] / self.input_size
            self.biases[i] -= self.learning_rate * grads["db"][i] / self.input_size

    def _mbgd(self, grads):
        for i in range(len(self.weights)):
            self.weights[i] -= (
                self.learning_rate * grads["dW"][i] / grads["dW"][i].shape[1]
            )
            self.biases[i] -= (
                self.learning_rate * grads["db"][i] / grads["db"][i].shape[1]
            )

    # Initialize weights and biases
    def _initialize_weights_and_biases(self):
        num_layers = len(self.hidden_layers)
        weights = []
        biases = []

        if num_layers == 0:
            w = np.random.randn(self.input_size, self.output_size)
            b = np.zeros((1, self.output_size))
            weights.append(w)
            biases.append(b)
            return weights, biases

        # Using Github Copilot
        for i in range(num_layers + 1):
            if i == 0:
                w = 0.01 * np.random.randn(self.input_size, self.hidden_layers[0])
            elif i == num_layers:
                w = 0.01 * np.random.randn(self.hidden_layers[-1], self.output_size)
            else:
                w = 0.01 * np.random.randn(
                    self.hidden_layers[i - 1], self.hidden_layers[i]
                )

            b = np.zeros((1, w.shape[1]))
            weights.append(w)
            biases.append(b)

        return weights, biases

    # Forward propagation
    def _forward_propagation(self, X):
        num_layers = len(self.weights)
        A = X
        caches = []

        for i in range(num_layers):
            W = self.weights[i]
            b = self.biases[i]

            Z = np.dot(A, W) + b

            if Z.shape[1] == 1:
                Z = Z.reshape(-1)
            caches.append((A, W, b, Z))
            # print("Forward", A.shape, W.shape, b.shape, Z.shape)

            A = self.activation_func(Z)

        if len(A.shape) == 1:
            A = A.reshape(-1)
        return A, caches

    # Backward propagation
    def _backward_propagation(self, A, Y, caches):
        num_samples = A.shape[0]
        num_layers = len(self.weights)
        grads = {"dW": [], "db": []}

        delta = A - Y
        # print(delta.shape, A.shape, Y.shape)

        for i in reversed(range(num_layers)):
            A, W, _, Z = caches[i]
            # print("A", A.shape, "W", W.shape, "Z", Z.shape)

            dZ = np.multiply(delta, self._activation_derivative(Z))
            if dZ.ndim == 1:
                dZ = dZ.reshape((dZ.shape[0], 1))
            # print("dZ", dZ.shape)
            dW = np.dot(A.T, dZ)
            # print("dW", dW.shape)
            db = np.sum(dZ, axis=0, keepdims=True)
            # print("db", db.shape)

            delta = np.dot(dZ, W.T)
            # print("delta", delta.shape)

            if len(dW.shape) == 1:
                dW = dW.reshape(-1, 1)

            grads["dW"].append(dW)
            grads["db"].append(db)

        grads["dW"].reverse()
        grads["db"].reverse()

        return grads

    # Calculate cost
    def _calculate_cost(self, A, Y):
        # print(A.shape, Y.shape)
        mse = np.mean(np.square(A - Y))
        rmse = np.sqrt(mse)
        r_squred = 1 - (np.sum(np.square(Y - A)) / np.sum(np.square(Y - np.mean(Y))))
        return mse, rmse, r_squred

    # Predict
    def predict(self, X):
        A, _ = self._forward_propagation(X)
        return A
    
    # Evaluate
    def evaluate(self, X, Y):
        A = self.predict(X)
        mse, rmse, r_squared = self._calculate_cost(A, Y)
        return mse, rmse, r_squared

    # Train the model
    def train(
        self, X, Y, max_epochs=10, batch_size=32, X_validation=None, y_validation=None
    ):
        num_samples = X.shape[0]
        costs = []

        for i in range(max_epochs):
            if self.optimizer == "bgd":
                batch_size = num_samples
                num_batches = 1
            elif self.optimizer == "sgd":
                batch_size = 1
                num_batches = num_samples
            elif self.optimizer == "mbgd":
                num_batches = num_samples // batch_size
            else:
                raise ValueError(f"Optimizer '{self.optimizer}' not supported.")

            for j in range(num_batches):
                start = j * batch_size
                end = start + batch_size

                A, caches = self._forward_propagation(X[start:end])
                grads = self._backward_propagation(A, Y[start:end], caches)
                self.optimizer_func(grads)

            A = self.predict(X)
            mse, rmse, r_squared = self._calculate_cost(A, Y)
            costs.append(mse)

            data_to_log = {"epoch": i + 1, "train_loss": mse}

            # Calculate validation loss
            if X_validation is not None and y_validation is not None:
                A = self.predict(X_validation)
                val_loss_mse, val_loss_rmse, val_loss_r_squared = self._calculate_cost(A, y_validation)
                data_to_log["val_loss"] = val_loss_mse

            if self.wandb_log:
                wandb.log(data_to_log)

            if self.print_every and (i + 1) % self.print_every == 0:
                print(f"Cost after {i+1} epochs: {mse}")

        return costs

In [13]:
class MLP_Classifier:
    def __init__(self, input_size, hidden_layers, num_classes=11, learning_rate=0.01, activation='sigmoid', optimizer='sgd', wandb_log=False, print_every=10):
        assert activation.lower() in ['sigmoid', 'relu', 'tanh'], "Activation function must be either 'sigmoid', 'relu' or 'tanh'"
        assert optimizer.lower() in ['sgd', 'bgd', 'mbgd'], "Optimizer must be either 'sgd', 'bgd' or 'mbgd'"
        assert input_size > 0, "Input size must be greater than 0"
        assert num_classes > 0, "Output size must be greater than 0"
        assert learning_rate > 0, "Learning rate must be greater than 0"
        assert type(hidden_layers) == list and len(hidden_layers) > 0, "Hidden layers must be a list of size greater than 0"

        self.input_size = input_size
        self.hidden_layers = hidden_layers
        self.output_size = num_classes
        self.learning_rate = learning_rate
        self.optimizer = optimizer
        self.activation_func = self._get_activation_func(activation)
        self.optimizer_func = self._get_optimizer_func(optimizer)
        self.weights, self.biases = self._initialize_weights_and_biases()

        self.wandb_log = wandb_log
        self.print_every = print_every
    
    # Activation functions
    def _get_activation_func(self, activation):
        if activation == 'sigmoid':
            return self._sigmoid
        elif activation == 'tanh':
            return self._tanh
        elif activation == 'relu':
            return self._relu
        else:
            raise ValueError(f"Activation function '{activation}' not supported.")

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def _tanh(self, x):
        return np.tanh(x)

    def _relu(self, x):
        return np.maximum(0, x)
    
    # Activation derivative
    def _activation_derivative(self, Z):
        if self.activation_func == self._sigmoid:
            return self._sigmoid_derivative(Z)
        elif self.activation_func == self._tanh:
            return self._tanh_derivative(Z)
        elif self.activation_func == self._relu:
            return self._relu_derivative(Z)
        else:
            raise ValueError(f"Activation function '{self.activation_func}' not supported.")
    
    def _sigmoid_derivative(self, Z):
        return self._sigmoid(Z) * (1 - self._sigmoid(Z))
    
    def _tanh_derivative(self, Z):
        return 1 - np.square(self._tanh(Z))
    
    def _relu_derivative(self, Z):
        return np.where(Z > 0, 1, 0)
    
    ## Optimizers
    def _get_optimizer_func(self, optimizer):
        if optimizer == 'sgd':
            return self._sgd
        elif optimizer == 'bgd':
            return self._bgd
        elif optimizer == 'mbgd':
            return self._mbgd
        else:
            raise ValueError(f"Optimizer '{optimizer}' not supported.")
    
    def _sgd(self, grads):
        for i in range(len(self.weights)):
            self.weights[i] -= (self.learning_rate * grads['dW'][i])
            self.biases[i] -= (self.learning_rate * grads['db'][i])
    
    def _bgd(self, grads):
        for i in range(len(self.weights)):
            self.weights[i] -= (self.learning_rate * grads['dW'][i] / self.input_size)
            self.biases[i] -= (self.learning_rate * grads['db'][i] / self.input_size)

    def _mbgd(self, grads):
        for i in range(len(self.weights)):
            self.weights[i] -= (self.learning_rate * grads['dW'][i] / grads['dW'][i].shape[1])
            self.biases[i] -= (self.learning_rate * grads['db'][i] / grads['db'][i].shape[1])
    
    # Initialize weights and biases
    def _initialize_weights_and_biases(self):
        num_layers = len(self.hidden_layers)
        weights = []
        biases = []

        if num_layers == 0:
            w = np.random.randn(self.input_size, self.output_size)
            b = np.zeros((1, self.output_size))
            weights.append(w)
            biases.append(b)
            return weights, biases
        
        # Using Github Copilot
        for i in range(num_layers + 1):
            if i == 0:
                w = np.random.randn(self.input_size, self.hidden_layers[0])
            elif i == num_layers:
                w = np.random.randn(self.hidden_layers[-1], self.output_size)
            else:
                w = np.random.randn(self.hidden_layers[i - 1], self.hidden_layers[i])
            
            b = np.zeros((1, w.shape[1]))
            weights.append(w)
            biases.append(b)

        return weights, biases
    
    # Forward propagation
    def _forward_propagation(self, X):
        num_layers = len(self.weights)
        A = X
        caches = []
        
        for i in range(num_layers):
            W = self.weights[i]
            b = self.biases[i]
            Z = np.dot(A, W) + b
            
            if Z.shape[1] == 1:
                Z = Z.reshape(-1)
            caches.append((A, W, b, Z))
            # print(A.shape, W.shape, b.shape, Z.shape)

            A = self.activation_func(Z)
        
        if len(A.shape) == 1:
            A = A.reshape(-1)
        return A, caches

    # Backward propagation
    def _backward_propagation(self, A, Y, caches):
        num_samples = A.shape[0]
        num_layers = len(self.weights)
        grads = {'dW': [], 'db': []}

        delta = A-Y
        # print(delta.shape, A.shape, Y.shape)

        for i in reversed(range(num_layers)):
            A, W, _, Z = caches[i]
            # print("A", A.shape, "W", W.shape, "Z", Z.shape)
            
            dZ = np.multiply(delta, self._activation_derivative(Z))
            if dZ.ndim == 1:
                dZ = dZ.reshape((dZ.shape[0], 1))
            # print("dZ", dZ.shape)
            dW = np.dot(A.T, dZ)
            # print("dW", dW.shape)
            db = np.sum(dZ, axis=0, keepdims=True)
            # print("db", db.shape)

            delta = np.dot(dZ, W.T)
            # print("delta", delta.shape)

            if len(dW.shape) == 1:
                dW = dW.reshape(-1, 1)

            grads['dW'].append(dW)
            grads['db'].append(db)
        
        grads['dW'].reverse()
        grads['db'].reverse()

        return grads
    
    # Calculate cost
    def _calculate_cost(self, A, Y):
        cost = np.mean(np.not_equal(A, Y))
        return cost

    # Predict
    def predict(self, X):
        A, _ = self._forward_propagation(X)

        # Apply a softmax to get probabilities & then getting the h9ghest probability
        A = np.exp(A) / np.sum(np.exp(A), axis=1, keepdims=True)

        A = np.argmax(A,axis=1)
        return A

    # Train the model
    def train(self, X, Y, max_epochs=10, batch_size=32, X_validation=None, y_validation=None):
        num_samples = X.shape[0]
        costs = []

        enc = OneHotEncoder().fit(np.array([[i,] for i in range(self.output_size)]))
        y_new = enc.transform(Y.reshape(-1, 1)).toarray()
        
        for i in range(max_epochs):
            if self.optimizer == "bgd":
                batch_size = num_samples
                num_batches = 1
            elif self.optimizer == "sgd":
                batch_size = 1
                num_batches = num_samples
            elif self.optimizer == "mbgd":
                num_batches = num_samples // batch_size
            else:
                raise ValueError(f"Optimizer '{self.optimizer}' not supported.")

            for j in range(num_batches):
                start = j * batch_size
                end = start + batch_size
                
                A, caches = self._forward_propagation(X[start:end])
                grads = self._backward_propagation(A, y_new[start:end], caches)
                self.optimizer_func(grads)
            
            A = self.predict(X)
            cost = self._calculate_cost(A, Y)
            costs.append(cost)

            data_to_log = {
                "epoch": i + 1,
                "train_loss": cost
            }

            # Calculate validation loss
            if X_validation is not None and y_validation is not None:
                A = self.predict(X_validation)
                val_loss = self._calculate_cost(A, y_validation)
                data_to_log["val_loss"] = val_loss

            if self.wandb_log:
                wandb.log(data_to_log)
            
            if self.print_every and (i+1) % self.print_every == 0:
                print(f"Cost after {i+1} epochs: {cost}")
        
        return costs

In [14]:
class Logistic_Regression:
    def __init__(self, num_classes=10, learning_rate=0.01, num_epochs=1000, print_every=None, wandb_log=False):
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.classes = range(num_classes)
        self.weights = [None,] * num_classes
        self.bias = [None,] * num_classes
        self.print_every = print_every
        self.wandb_log = wandb_log
        self.num_classes = num_classes

        self.val_acc = []
        self.train_acc = []

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def softmax(self, z):
        return np.exp(z) / np.sum(np.exp(z), axis=0)
    
    def loss(self, y_true, y_pred):
        return -np.sum(y_true * np.log(y_pred)) / len(y_true)
    
    def fit(self, X, y, X_val, y_val):
        num_samples, num_features = X.shape
        self.weights = np.zeros((num_features, self.num_classes))
        self.bias = np.zeros(self.num_classes)
        train_loss = []
        self.val_acc = []
        self.train_acc = []

        enc = OneHotEncoder().fit(np.array([[i,] for i in range(self.num_classes)]))
        y_new = enc.transform(y.reshape(-1, 1)).toarray()
        
        for i in range(self.num_epochs):
            z = self.sigmoid(np.dot(X, self.weights) + self.bias[np.newaxis, :])
            y_predicted = self.softmax(z)

            dw = (1 / num_samples) * np.dot(X.T, (y_predicted - y_new))
            db = (1 / num_samples) * np.sum(y_predicted - y_new)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

            loss = self.loss(y_new, y_predicted)
            train_loss.append(loss)

            y_predicted_val = self.predict(X_val)
            self.val_acc.append(self.accuracy(y_val, y_predicted_val))
            y_predicted_train = self.predict(X)
            self.train_acc.append(self.accuracy(y, y_predicted_train))

            if self.print_every and (i+1) % self.print_every == 0:
                print(f'Epoch {i+1} loss: {loss:.4f}')
                print(f'Val Acc: {self.val_acc[-1]*100:.2f} Train Acc: {self.train_acc[-1]*100:.2f}')

            if self.wandb_log:
                wandb.log({"epoch": i+1,"train_loss": loss, "val_acc": self.val_acc[-1], "train_acc": self.train_acc[-1]})
        
        return train_loss
    
    def predict(self, X):
        y_predicted = self.softmax(np.dot(X, self.weights) + self.bias[np.newaxis, :])
        y_predicted_cls = np.argmax(y_predicted, axis=1)
        return y_predicted_cls
    
    def accuracy(self, y_true, y_pred):
        accuracy = np.sum(y_true == y_pred) / len(y_true)
        return accuracy
    
    def evaluate(self, X, y):
        y_pred = self.predict(X)
        accuracy = self.accuracy(y, y_pred)
        return accuracy
    
    def plot_loss(self, train_loss):
        plt.plot(range(1, len(train_loss) + 1), train_loss, label='Training Loss')
        plt.title('Training Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.show()

    def plot_acc(self):
        plt.plot(range(1, len(self.val_acc) + 1), self.val_acc, label='Validation Accuracy')
        plt.plot(range(1, len(self.train_acc) + 1), self.train_acc, label='Train Accuracy')
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.legend()
        plt.grid(True)
        plt.show()

In [70]:
class Linear_Regression:
    def __init__(self, learning_rate=0.01, num_epochs=1000, print_every=None, wandb_log=False):
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.weights = None
        self.bias = None
        self.print_every = print_every
        self.wandb_log = wandb_log

        self.train_loss = []

    def fit(self, X, y):
        num_samples, num_features = X.shape
        self.weights = np.zeros(num_features)
        self.bias = 0
        self.train_loss = []

        for i in range(self.num_epochs):
            y_predicted = np.dot(X, self.weights) + self.bias

            error = y_predicted - y
            dw = (1 / num_samples) * np.dot(X.T, error)
            db = (1 / num_samples) * np.sum(error)

            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

            loss = self.loss(y, y_predicted)
            self.train_loss.append(loss)

            if self.print_every and (i+1) % self.print_every == 0:
                print(f'Epoch {i+1} loss: {loss:.4f}')

            if self.wandb_log:
                wandb.log({"epoch": i+1,"train_loss": loss})
        
        return self.train_loss
    
    def predict(self, X):
        y_predicted = np.dot(X, self.weights) + self.bias
        return y_predicted
    
    def loss(self, y_true, y_pred):
        return np.mean(np.square(y_true - y_pred))
    
    def evaluate(self, X, y):
        y_pred = self.predict(X)
        loss = self.loss(y, y_pred)
        return loss
    
    def plot_loss(self):
        plt.plot(range(1, len(self.train_loss) + 1), self.train_loss, label='Training Loss')
        plt.title('Training Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.show()

#### Ensemble Models

In [106]:
class Bagging:
    def __init__(self, base_estimator, n_estimators=10, fraction=0.15, bootstrap=True, hard_voting=True, classification=True, random_state=None):
        assert fraction > 0 and fraction <= 1, "Fraction must be between 0 and 1"
        assert n_estimators > 0, "Number of estimators must be greater than 0"

        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.fraction = fraction
        self.bootstrap = bootstrap
        self.hard_voting = hard_voting
        self.random_state = random_state
        self.classification = classification

        self.estimators = []
        self.estimators_weights = []

    def _bootstrap(self, X, y, fraction, n):
        data_array = []
        for i in range(n):
            indices = np.random.choice(range(X.shape[0]), size=int(X.shape[0] * fraction), replace=False)
            data_array.append((X[indices], y[indices]))
        return data_array
    
    def _random_sampling(self, X, y, fraction, n):
        data_array = []
        permuted_index = np.random.permutation(len(X))
        X = X[permuted_index]
        y = y[permuted_index]
        num_samples = int(X.shape[0] * fraction)
        for i in range(num_samples, X.shape[0], num_samples):
            data_array.append((X[i-num_samples: i], y[i-num_samples: i]))
        return data_array[:n]
        
    def fit(self, X, y):
        if self.bootstrap:
            Samples = self._bootstrap(X, y, self.fraction, self.n_estimators)
        else:
            Samples = self._random_sampling(X, y, self.fraction, self.n_estimators)

        for i in range(self.n_estimators):
            X_sample, y_sample = Samples[i]
            
            estimator = self.base_estimator()
            estimator.fit(X_sample, y_sample)
            self.estimators.append(estimator)

    def predict(self, X):
        predictions = []
        for estimator in self.estimators:
            predictions.append(estimator.predict(X))
        predictions = np.array(predictions)

        if self.hard_voting:
            predictions = np.where(np.sum(predictions, axis=0) > (len(self.estimators) / 2), 1, 0)
        else:
            predictions = np.mean(predictions, axis=0)
        
        return predictions
    
    def plot_heatmap_accuracies(self, X, y):
        accuracies = []
        for estimator in self.estimators:
            y_pred = estimator.predict(X)
            accuracy = np.sum(y_pred == y) / len(y)
            accuracies.append(accuracy)
        accuracies = np.array(accuracies)
        accuracies = accuracies.reshape((int(np.sqrt(self.n_estimators)), -1))
        plt.figure(figsize=(10, 10))
        plt.title("Heatmap of Accuracies")
        sns.heatmap(accuracies, annot=True, cmap='Blues', fmt='.2f')
        plt.show()

In [107]:
bagging = Bagging(base_estimator=DecisionTreeClassifier, n_estimators=10, fraction=0.15, bootstrap=False, hard_voting=True, random_state=42)
bagging.fit(wine_X_train, wine_y_train)

### Main Code

#### Datasets

In [16]:
wine_dataset = pd.read_csv(wine_dataset_path)
wine_dataset.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,Id
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,1
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,2
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,3
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,4


In [17]:
wine_X = wine_dataset.drop(columns=['quality']).to_numpy()
wine_y = wine_dataset['quality'].to_numpy()

wine_num_classes = 11 # 0-10

In [18]:
wine_X = StandardScaler().fit_transform(wine_X)

In [19]:
wine_X_train, wine_X_test, wine_y_train, wine_y_test = train_test_split(wine_X, wine_y, test_size=0.2, random_state=random_state)
wine_X_train, wine_X_val, wine_y_train, wine_y_val = train_test_split(wine_X_train, wine_y_train, test_size=0.25, random_state=random_state)

In [20]:
housing_dataset = pd.read_csv(housing_dataset_path)
housing_dataset.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,,36.2


In [21]:
housing_X = housing_dataset.drop(columns=['MEDV']).to_numpy()
housing_y = housing_dataset['MEDV'].to_numpy()

In [65]:
housing_X = StandardScaler().fit_transform(housing_X)

In [66]:
housing_X_train, housing_X_test, housing_y_train, housing_y_test = train_test_split(housing_X, housing_y, test_size=0.2, random_state=random_state)
housing_X_train, housing_X_val, housing_y_train, housing_y_val = train_test_split(housing_X_train, housing_y_train, test_size=0.25, random_state=random_state)

#### Models

In [88]:
dt_classifier_param_grid = {
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 10, 30, 50],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

dt_regressor_param_grid = {
    'criterion': ['absolute_error', 'friedman_mse', 'poisson', 'squared_error'],
    'splitter': ['best', 'random'],
    'max_depth': [None, 10, 30, 50],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [97]:
dt_classifier = DecisionTreeClassifier(random_state=random_state)

grid_search = GridSearchCV(dt_classifier, dt_classifier_param_grid, cv=5, n_jobs=-1, verbose=1)
grid_search.fit(wine_X_train, wine_y_train)

dt_classifier = DecisionTreeClassifier(**grid_search.best_params_, random_state=random_state)
dt_classifier.fit(wine_X_train, wine_y_train)

best_params_dt_classifier = grid_search.best_params_
print(f"Best params: {grid_search.best_params_}")

Fitting 5 folds for each of 144 candidates, totalling 720 fits




Best params: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'random'}


In [98]:
dt_regressor = DecisionTreeRegressor(random_state=random_state)

grid_search = GridSearchCV(dt_regressor, dt_regressor_param_grid, cv=5, n_jobs=-1, verbose=1)
grid_search.fit(housing_X_train, housing_y_train)

dt_regressor = DecisionTreeRegressor(**grid_search.best_params_, random_state=random_state)
dt_regressor.fit(housing_X_train, housing_y_train)

best_params_dt_regressor = grid_search.best_params_
print(f"Best params: {grid_search.best_params_}")

Fitting 5 folds for each of 288 candidates, totalling 1440 fits
Best params: {'criterion': 'absolute_error', 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10, 'splitter': 'best'}


In [None]:
best_params_logistic = [0.001, 15] # best values from hyperparameter tuning from Assignment 3

logreg = Logistic_Regression(num_classes=wine_num_classes, learning_rate=best_params_logistic[0], num_epochs=best_params_logistic[1], print_every=None)
logreg.fit(wine_X_train, wine_y_train, wine_X_val, wine_y_val)

In [91]:
linear_params_grid = {
    'learning_rate': [0.001, 0.01, 0.1],
    'num_epochs': [100, 500, 1000, 2000]
}

best_linreg = [None, None, np.inf]
for lr in linear_params_grid['learning_rate']:
    for ne in linear_params_grid['num_epochs']:
        linreg = Linear_Regression(learning_rate=lr, num_epochs=ne)
        linreg.fit(housing_X_train, housing_y_train)
        loss = linreg.evaluate(housing_X_val, housing_y_val)
        if loss < best_linreg[2]:
            best_linreg = [lr, ne, loss]

linreg = Linear_Regression(learning_rate=best_linreg[0], num_epochs=best_linreg[1])
linreg.fit(housing_X_train, housing_y_train)

print(f"Best params: {best_linreg}")

Best params: [0.1, 2000, 24.697510978480615]


MLP Hyperparameters from Assignment 3

In [96]:
best_params_mlp_classifier = ['sgd', 'sigmoid', [8,], 0.01, 32, 1000] # optimizer, activation, hidden_layers, lr, batch_size, max_epochs

mlp_classifier = MLP_Classifier(input_size=wine_X_train.shape[1], hidden_layers=best_params_mlp_classifier[2], num_classes=wine_num_classes, learning_rate=best_params_mlp_classifier[3], optimizer=best_params_mlp_classifier[0], activation=best_params_mlp_classifier[1], print_every=None)
mlp_classifier.train(wine_X_train, wine_y_train, max_epochs=best_params_mlp_classifier[5], batch_size=best_params_mlp_classifier[4], X_validation=wine_X_val, y_validation=wine_y_val)

print(f"Best params: {best_params_mlp_classifier} # optimizer, activation, hidden_layers, lr, batch_size, max_epochs")

Best params: ['sgd', 'sigmoid', [8], 0.01, 32, 1000] # optimizer, activation, hidden_layers, lr, batch_size, max_epochs


In [109]:
best_params_mlp_regressor = ['sgd', 'relu', [8,], 0.001, 32, 1000] # optimizer, activation, hidden_layers, lr, batch_size, max_epochs

mlp_regressor = MLP_Regression(input_size=housing_X_train.shape[1], hidden_layers=best_params_mlp_regressor[2], output_size=1, learning_rate=best_params_mlp_regressor[3], optimizer=best_params_mlp_regressor[0], activation=best_params_mlp_regressor[1], print_every=None)
mlp_regressor.train(housing_X_train, housing_y_train, max_epochs=best_params_mlp_regressor[5], batch_size=best_params_mlp_regressor[4], X_validation=housing_X_val, y_validation=housing_y_val)

print(f"Best params: {best_params_mlp_regressor} # optimizer, activation, hidden_layers, lr, batch_size, max_epochs")

Best params: ['sgd', 'relu', [8], 0.001, 32, 1000] # optimizer, activation, hidden_layers, lr, batch_size, max_epochs
