In [1]:
pip install wget

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import math
import random
import traceback
from pdb import set_trace
import sys
import numpy as np
from abc import ABC, abstractmethod
import traceback
from typing import List, Tuple, Union, Callable

In [3]:
from util.timer import Timer
from util.data import split_data, dataframe_to_array, binarize_classes
from util.metrics import accuracy
from util.metrics import mse
from sklearn.metrics import confusion_matrix
from util.metrics import nll, sse
from util.data import AddBias, Standardization, ImageNormalization, OneHotEncoding
from sklearn.base import BaseEstimator
from sklearn.pipeline import Pipeline
from datasets.MNISTDataset import MNISTDataset
from datasets.HousingDataset import HousingDataset

Your Name: HRITIKA KUCHERIYA

# Understand the Dataset

In this programming assignment you will be implementing and training **two** neural networks. Each neural network will be trained on a different set of data. You will utilize the Boston House Pricing regression dataset and MNIST handwritten digit classification dataset. To account for the different ML problems and datasets you will need to implement the following neural networks:

1. **Regressor neural network** which predicts the median value of owner-occupied homes in $1000's
2. **Classifier neural network** for multi-class classification that predicts the digits 0-9 


## Housing Dataset

![](https://assets.prevu.com/blogs/images/first-time-buyer-boston-real-estate/03d0c13cdf6721a022afd91e343493b5?ixlib=rb-4.0.3&w=670&lossless=true&auto=format%20compress&fit=fill&fill=solid&s=cb885d7fc811865d8d2219c47c87eb01)

The first dataset you'll be using for this project is the Boston Housing dataset which contains various different features about houses in Boston. This is a classic machine learning dataset from 1978 and is one of the first datasets most people use when first learning machine learning. **There are 506 samples and 13 feature variables in this dataset.**

The dataset consists of 3 splits:

1. **Train**: Throughout this assignment you will be training your model using this data.
2. **Validation**: You will then use this set to tune your model and evaluate its performance.
3. **Test**: This split simulates real life data which we often don't have access to until the model is deployed. We have kept this split hidden from you and we will use it to judge the performance of your model.

![](https://camo.githubusercontent.com/01c057a753e92a9bc70b8c45d62b295431851c09cffadf53106fc0aea7e2843f/687474703a2f2f692e7974696d672e636f6d2f76692f3051493378675875422d512f687164656661756c742e6a7067)
The second dataset you'll be using for this project is the famous [MNIST](https://en.wikipedia.org/wiki/MNIST_database) dataset which contains images of handwritten digits 0 through 9. There are 60,000 images included in the dataset and each image is a gray scale image of size 28x28. Each pixel represents a feature which means there are $28*28$ or $784$ features per each data sample.

The dataset consists of 3 splits:

1. **Train**: Throughout this assignment you will be training your model using this data. There are approximately 44k training samples.
2. **Validation**: You will then use this set to tune your model and evaluate its performance. There are approximately 12k training samples.
3. **Test**: This split simulates real life data which we often don't have access to until the model is deployed. We have kept this split hidden from you and we will use it to judge the performance of your model on Autolab.

You DO NOT have access to the Test set as it gonna be used for scoring. This will not prevent you to complete this assignment at all.

# Design Machine Learning Models (TODO)

## Base Model
Basic model structure, **don't change** this component.

In [4]:
class BaseModel(ABC):
    """ Super class for ITCS Machine Learning Class"""

    @abstractmethod
    def fit(self, X, y):
        pass

    @abstractmethod
    def predict(self, X):
        pass

In [5]:
class Layer():
    """ Class which stores all variables required for a layer in a neural network
    
        Attributes:
            W: NumPy array of weights for all neurons in the layer
            
            b: NumPy array of biases for all neurons in the layer
            
            g: Activation function for all neurons in the layer
            
            name: Name of the layer
            
            neurons: Number of neurons in the layer
            
            inputs: Number of inputs into the layer
            
            Z: Linear combination of weights and inputs for all neurons. 
                Initialized to an empty array until it is computed and set.
                
            A: Activation output for all neurons. Initialized to an empty 
                array until it is computed and set.
    """
    def __init__(
        self, 
        W:np.array, 
        b:np.array, 
        g: object, 
        name: str=""
    ):
        self.W = W
        self.b = b
        self.g = g
        self.name = name 
        self.neurons = len(W)
        self.inputs = W.shape[1]
        self.Z = np.array([])
        self.A = np.array([])
    
    def print_info(self) -> None:
        """ Prints info for all class attributes"""
        print(f"{self.name}")
        print(f"\tNeurons: {self.neurons}")
        print(f"\tInputs: {self.inputs}")
        print(f"\tWeight shape: {self.W.shape}")
        print(f"\tBias shape: {self.b.shape}")
        print(f"\tActivation function: {self.g.__name__}")
        print(f"\tZ shape: {self.Z.shape}")
        print(f"\tA shape: {self.A.shape}")
    

In [6]:
def get_mini_batches(data_len: int, batch_size: int = 32) -> List[np.ndarray]:
    """ Generates mini-batches based on the data indexes
        
        Args:
            data_len: Length of the data
            
            batch_size: Size of each mini batch where the last mini-batch
                might be smaller than the rest if the batch_size does not 
                evenly divide the data length.
    
    """
    X_idx = np.arange(data_len)
    np.random.shuffle(X_idx)
    batches = [X_idx[i:i+batch_size] for i in range(0, data_len, batch_size)]
    
    return batches

## TODO: Neural Network
Complete the TODOs. 

In [7]:
class NeuralNetwork(BaseEstimator):
    """ Runs the initialization and training process for a multi-layer neural network.
        
        Attributes:
            neurons_per_layer: A list where each element represents 
                    the neurons in a layer. For example, [2, 3] would
                    create a 2 layer neural network where the hidden layer
                    has 2 neurons and the output layer has 3 neurons.
            
            learning_curve_loss: Pointer to a function which computes the SSE or NLL loss.
                This loss will be tracked for each mini-batch and epoch. The loss computed
                will be stored in the avg_trn_loss_tracker and avg_vld_loss_tracker variables.

            delta_loss_func: Pointer to a function which computes the  derivative for
                the MSE or NLL loss.

            g_hidden: Activation function used by ALL neurons 
                in ALL hidden layers.
                    
            g_output: Activation function used by ALL neurons
                in the output layer.
        
            alpha: learning rate or step size used by gradient descent.
                
            epochs: Number of times data is used to update the weights `self.w`.
                Each epoch means a data sample was used to update the weights at least
                once.
            
            batch_size: Mini-batch size used to determine the size of mini-batches
                if mini-batch gradient descent is used.
            
            seed: Random seed to use when initializing the layers of the neural network.

            verbose: If True, print statements inside the train() method will
                be printed.

            nn: A list of Layer class instances which define the neural network.

            avg_trn_loss_tracker: A list that tracks the average training loss per epoch. 

            avg_vld_loss_tracker: A list that tracks the average validation loss per epoch.
            
    """
    def __init__(
        self,
        neurons_per_layer: List[int],
        learning_curve_loss: Callable,
        delta_loss_func: Callable,
        g_hidden: object,
        g_output: object,
        alpha: float = .001, 
        epochs: int = 1, 
        batch_size: int = 64,
        seed: int = None,
        verbose: bool = False,
    ):
        self.neurons_per_layer = neurons_per_layer
        self.learning_curve_loss = learning_curve_loss
        self.delta_loss_func = delta_loss_func
        self.g_hidden = g_hidden
        self.g_output = g_output
        self.alpha = alpha
        self.epochs = epochs
        self.batch_size = batch_size
        self.seed = seed
        self.verbose = verbose

        self.nn = []
        self.avg_trn_loss_tracker = []
        self.avg_vld_loss_tracker = []

    def init_neural_network(self, n_input_features: int)-> List[Layer]:
        """ Initializes weights and biases for a multi-layer neural network 
        
            Args:
                n_input_features: Number of features the input data has
                
            TODO:
                Finish this method by completing the for loop to initialize the weights
                `W` and biases `b`. Once initialized, create an instance of the `Layer`
                class by passing the required arguments of weights `W`, biases `b`, 
                activation function `g`, and name `name` and then append it to the 
                `nn` list. Return the completed neural network `nn` once the for-loop
                has finished.

        """
        nn = []
        # Set numpy global random seed
        np.random.seed(self.seed)
        for l, neurons in enumerate(self.neurons_per_layer):
            # Set inputs to number of input features
            # for the first hidden layer
            if l == 0:
                inputs = n_input_features
            else:
                inputs = self.neurons_per_layer[l-1]
            
            # Set activation functions for the output
            # layer neurons and set the names of the nn
            if l == len(self.neurons_per_layer)-1:
                g = self.g_output
                name = f"Layer {l+1}: Output Layer"
            else:
                g = self.g_hidden
                name = f"Layer {l+1}: Hidden Layer"
            
            # Initialize weights and biases
            W = self.init_weights(neurons, inputs)
            b = np.zeros((neurons, 1))
            
            # Create layer and add to neural network
            layer = Layer(W, b, g, name)
            nn.append(layer)
            
        return nn

    def init_weights(self, neurons: int, inputs: int) -> np.ndarray:
        """ Initializes weight values
        
            Args:
                neurons: Number of neurons in the layer
                
                inputs: Number of inputs to the layer
            
            TODO:
                Finish this method by returning randomly initalized weights given
                the arguments for the number of neurons and inputs. Return the randomly
                initialized weights once done.
        """
        limit = np.sqrt(6 / (neurons + inputs))
        return np.random.uniform(-limit, limit, size=(neurons, inputs))
    
    def fit(
        self, 
        X: np.ndarray, 
        y: np.ndarray, 
        X_vld: np.ndarray = None, 
        y_vld: np.ndarray = None,
    ) -> None:
        """ Initializes and trains the defined neural network using gradient descent  
        
            Args:
                X: Training features/data 
                
                y: Training targets/labels

                X_vld: validation features/data which are used for computing the validation
                    loss after every epoch.

                y_vld: validation targets/labels which are used for computing the validation
                    loss after every epoch.
                    
            TODO:
                Finish this method by completing the training loop which performs 
                mini-batch gradient descent and tracks the training loss and validation
                scores per each epoch. To complete the training loop, you will need to
                initialize the neural network list `nn`, call the forward pass, and call
                the backwards pass.
        """
        m = len(X)
        self.avg_trn_loss_tracker = []
        self.avg_vld_loss_tracker = []
        
        # TODO (REQUIRED) Initialize self.nn below by replacing []
        self.nn = self.init_neural_network(X.shape[1])
        
        for e in range(self.epochs):
            if self.verbose: print(f"Epoch: {e+1}")
            batches = get_mini_batches(data_len=m, batch_size=self.batch_size)
            total_trn_batch_loss = 0
            for mb in batches:
                # Forward pass to get predictions
                # TODO (REQUIRED) Store the training forward pass predictions below by replacing np.zeros()
                y_hat = self.forward(X[mb])

                # Backward pass to get gradients
                # TODO (REQUIRED) Add backwards pass call below
                self.backward(X[mb], y[mb], y_hat)

                trn_batch_loss = self.learning_curve_loss(y[mb], y_hat)
                total_trn_batch_loss += trn_batch_loss
                
            avg_trn_loss = total_trn_batch_loss / m
            if self.verbose: print(f"\tTraining loss: {avg_trn_loss}")
            self.avg_trn_loss_tracker.append(avg_trn_loss)
            
            if X_vld is not None and y_vld is not None:
                m_vld = len(y_vld)
                # TODO (REQUIRED) Store the validation forward pass predictions below by replacing np.zeros()
                y_hat_vld = self.forward(X_vld)
                
                avg_vld_loss = self.learning_curve_loss(y_vld, y_hat_vld) / m_vld
                if self.verbose: print(f"\tValidation loss: {avg_vld_loss}")
                self.avg_vld_loss_tracker.append(avg_vld_loss)
            
    def forward(self, X:np.ndarray) -> np.ndarray:
        """ Performs the forward pass for a multi-layer neural network
    
            Args:
                X: Input features. This should be typically be the 
                    training data.
                    
            TODO: 
                Finish this method by performing the forward pass for a multi-layer
                neural network. Return the output `y_hat` once done.
        """
        if X.ndim == 1:
            X = X.reshape(1, -1)

        A_prev = X
        
        for layer in self.nn:
            layer.Z = np.dot(A_prev, layer.W.T) + layer.b.T
            
            layer.A = layer.g.activation(layer.Z)
            
            A_prev = layer.A
        
        return self.nn[-1].A
    
    def backward(self, X:np.ndarray, y:np.ndarray, y_hat:np.ndarray) -> None:
        """ Performs the feedback process for a multi-layer neural network
        
            Args:
                X: Training features/data
                
                y: Training targets/labels
                
                y_hat: Training predictions (predicted targets or probabilities)

            TODO:
                Finish this method by performing the backward pass for a multi-layer
                neural network.
        """
        m = X.shape[0] 
        
        delta = self.delta_loss_func(y, y_hat)
        
        for l in reversed(range(len(self.nn))):
            layer = self.nn[l]
            
            if l < len(self.nn) - 1:
                next_layer = self.nn[l+1]
                delta = np.dot(delta, next_layer.W) * layer.g.derivative(layer.Z)
            
            if l == 0:
                A_prev = X
            else:
                A_prev = self.nn[l-1].A
            
            dW = np.dot(delta.T, A_prev) / m
            db = np.sum(delta, axis=0, keepdims=True).T / m
            
            self.nn[l].W -= self.alpha * dW
            self.nn[l].b -= self.alpha * db

## TODO: Neural Network Regressor
Complete the TODOs.

In [8]:
class NeuralNetworkRegressor(NeuralNetwork):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        
        for key, value in kwargs.items():
            setattr(self, key, value)
        self._param_names = list(kwargs.keys())
    
    def get_params(self, deep=True):
        """ Gets all class variables
        
            This is a a method for compatibility with Sklearn's GridSearchCV 
        """
        return {param: getattr(self, param)
                for param in self._param_names}

    def set_params(self, **parameters):
        """ Sets all class variables
        
            This is a a method for compatibility with Sklearn's GridSearchCV 
        """
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        """ Make predictions using parameters learned during training.
        
            Args:
                X: Features/data to make predictions with 

            TODO:
                Finish this method by adding code to make a prediction. 
                Store the predicted labels into `y_hat`.
        """
        # TODO (REQUIRED) Add code below
        if X.ndim == 1:
            X = X.reshape(1, -1)

        # TODO (REQUIRED) Store predictions below by replacing np.ones()
        y_hat = self.forward(X)
        # Makes sure predictions are given as a 2D array
        return y_hat.reshape(-1, 1)


## TODO: Neural Network Classifier
Complete the TODOs.

In [9]:
class NeuralNetworkClassifier(NeuralNetwork):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        
        # Code for extracting kwargs and storing them in _param_names
        # to be used later with get_params() and set_params() methods
        for key, value in kwargs.items():
            setattr(self, key, value)
        self._param_names = list(kwargs.keys())
    
    def get_params(self, deep=True):
        """ Gets all class variables
        
            This is method is for compatibility with Sklearn's GridSearchCV 
        """
        return {param: getattr(self, param)
                for param in self._param_names}

    def set_params(self, **parameters):
        """ Sets all class variables
        
            This is method is for compatibility with Sklearn's GridSearchCV 
        """
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self
    
    def predict_proba(self, X: np.ndarray) -> np.ndarray:
        """ Predict probabilities using parameters learned during training.
        
            This is method is for compatibility with Sklearn's GridSearchCV 
                
            Args:
                X: Features/data to make predictions with 

        """
        return self.forward(X)
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        """ Make predictions using parameters learned during training.
        
            Args:
                X: Features/data to make predictions with 

            TODO:
                Finish this method by adding code to make a prediction. 
                Store the predicted labels into `y_hat`.
        """
        # TODO (REQUIRED) Add code below
        probs = self.forward(X)
         
        # TODO (REQUIRED) Store predictions below by replacing np.ones()
        y_hat = np.argmax(probs, axis=1)
        # Makes sure predictions are given as a 2D array
        return y_hat.reshape(-1, 1)


# Data preparation and Activation Functions

## Data preparation
**Don't change this part**

In [10]:
class DataPreparation():
    def __init__(self, target_pipe, feature_pipe):
        self.target_pipe = target_pipe
        self.feature_pipe = feature_pipe
        
    @abstractmethod
    def data_prep(self):
        pass
    
    def fit(self, X, y=None):
        if self.target_pipe  is not None:
            self.target_pipe.fit(y)
            
        if self.feature_pipe is not None:
            self.feature_pipe.fit(X)

    def transform(self, X, y=None):
        if self.target_pipe is not None:
            y = self.target_pipe.transform(y)
            
        if self.feature_pipe is not None:
            X = self.feature_pipe.transform(X)

        return X, y
    
    def fit_transform(self, X, y):
        self.fit(X, y)
        X, y = self.transform(X, y)
        return X, y

## Housing Dataset 
**Don't change this part**

In [11]:
class HousingDataPreparation(DataPreparation):
    def __init__(self, target_pipe, feature_pipe, use_features):
        super().__init__(target_pipe, feature_pipe)
        self.use_features = use_features
        
    def data_prep(self, return_array=False):
        
        if self.target_pipe is not None:
            warnings.warn("Target pipeline is not needed for the Boston House Price dataset. " \
                          "Even though you passed a Pipeline for `target_pipe`, " \
                          "`target_pipe` will be set to None.")
            self.target_pipe = None
        
        if return_array: 
            print("Returning data as NumPy array...")
            return_df = False
        
        print(f"Attempting to use the following features: {self.use_features}")
        housing_dataset = HousingDataset()
        house_df_trn, house_df_vld = housing_dataset.load()
        
        X_trn_df, y_trn_df, X_vld_df, y_vld_df = split_data(
            df_trn=house_df_trn,
            df_vld=house_df_vld,
            use_features=self.use_features,
            label_name='MEDV',
            return_df=return_df
        )

        X_trn_df, y_trn_df = self.fit_transform(X=X_trn_df, y=y_trn_df)
        X_vld_df, y_vld_df = self.transform(X=X_vld_df, y=y_vld_df)
        
        return X_trn_df, y_trn_df, X_vld_df, y_vld_df

## MNIST Dataset
**Don't change this part**

In [12]:
class MNISTDataPreparation(DataPreparation):
    def __init__(self, target_pipe, feature_pipe):
        super().__init__(target_pipe, feature_pipe)
        
    def data_prep(self, binarize=False, return_array=False):
        mnist_dataset = MNISTDataset()
        X_trn_df, y_trn_df, X_vld_df, y_vld_df = mnist_dataset.load()
        
        # Converts MNIST problem to classifying ONLY 1s vs 0s
        if binarize:
            X_trn_df, y_trn_df = binarize_classes(
                X_trn_df, 
                y_trn_df, 
                pos_class=[1],
                neg_class=[0], 
            )
            
            X_vld_df, y_vld_df = binarize_classes(
                X_vld_df, 
                y_vld_df, 
                pos_class=[1], 
                neg_class=[0], 
            )

        X_trn_df, y_trn_df = self.fit_transform(X=X_trn_df, y=y_trn_df)
        X_vld_df, y_vld_df = self.transform(X=X_vld_df, y=y_vld_df)

        if return_array:
            print("Returning data as NumPy array...")
            return dataframe_to_array([X_trn_df, y_trn_df, X_vld_df, y_vld_df])
            
        return X_trn_df, y_trn_df, X_vld_df, y_vld_df

## TODO: Define Activation Functions
Complete the TODOs

In [13]:
def delta_mse(y, y_hat):
    # TODO (REQUIRED) Add code below for the derivative of the mean squared error
    return y_hat - y

def delta_softmax_nll(y, y_hat):
    # TODO (REQUIRED) Add code below for for the combined derivative of the softmax and negative log likelihood loss
    return y_hat - y

class Linear():
    @staticmethod
    def activation(z):
        # TODO (REQUIRED) Add code below for Linear activation function equation
        return z
    
    @staticmethod
    def derivative(z):
        # TODO (REQUIRED) Add code below for Linear activation function derivative
        return np.ones_like(z)
    
class Sigmoid():
    @staticmethod
    def activation(z):
        # TODO (REQUIRED) Add code below for Sigmoid activation function equation
        z = np.clip(z, -500, 500)
        return 1 / (1 + np.exp(-z))
    
    @staticmethod
    def derivative(z):
        # TODO (REQUIRED) Add code below for Sigmoid activation function derivative
        sig_z = Sigmoid.activation(z)
        return sig_z * (1 - sig_z)


class Tanh():
    @staticmethod
    def activation(z):
        # TODO (REQUIRED) Add code below for Tanh activation function equation
        return np.tanh(z)
    
    @staticmethod
    def derivative(z):
        # TODO (REQUIRED) Add code below for Tanh activation function derivative
        return 1 - np.tanh(z)**2

class ReLU():
    @staticmethod
    def activation(z):
         # TODO (REQUIRED) Add code below for ReLU activation function equation
        return np.maximum(0, z)
    
    @staticmethod
    def derivative(z):
        # TODO (REQUIRED) Add code below for ReLU activation function derivative
        return (z > 0).astype(float)

class Softmax():
    @staticmethod
    def activation(z):
        # TODO (REQUIRED) Add code below for softmax activation function equation
        exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)
    
    @staticmethod
    def derivative(z):
        # TODO (REQUIRED) Add code below for softmax activation function derivative
        return np.ones_like(z)

# TODO: Define Hyperparameters 
Complete the TODOs. 

In [14]:
class HyperParametersAndTransforms():
    
    @staticmethod
    def get_params(name):
        model = getattr(HyperParametersAndTransforms, name)
        params = {}
        for key, value in model.__dict__.items():
            if not key.startswith('__') and not callable(key):
                if not callable(value) and not isinstance(value, staticmethod):
                    params[key] = value
        return params
    
    class NeuralNetworkRegressor():
        """Kwargs for regression neural network and data prep"""
        model_kwargs = dict(
            neurons_per_layer = [32, 24, 16, 8, 1], # TODO (REQUIRED) Set neural network neurons per layer
            learning_curve_loss = sse,
            delta_loss_func = delta_mse, # TODO (REQUIRED) Set neural network's loss function derivative
            g_hidden = ReLU, # TODO (REQUIRED) Set neural network's hidden neurons activation function
            g_output = Linear,  # TODO (REQUIRED) Set neural network's output neurons activation function
            alpha = 0.0005, # TODO (REQUIRED) Set neural network's learning rate
            epochs = 300,  # TODO (REQUIRED) Set neural network's  epochs
            batch_size = 8, # TODO (REQUIRED) Set neural network's mini-batch size
            verbose = True, # TODO (OPTIONAL) Set to allow neural network to print debugging statements during training 
            seed = 42, # TODO (OPTIONAL) Set the neural network to random state seed 
        )
        
        # (OPTIONAL) model kwargs used for hyper-parameter search.
        # EVERY argument must be wrapped in a list.
        search_model_kwargs = dict(
            neurons_per_layer = [[32, 24, 16, 8, 1], [64, 48, 32, 16, 1]], # TODO (OPTIONAL) Set neural network neurons per layer
            learning_curve_loss = [sse],
            delta_loss_func = [delta_mse], # TODO (OPTIONAL) Set neural network's loss function derivative
            g_hidden = [ReLU], # TODO (OPTIONAL) Set neural network's hidden neurons activation function
            g_output = [Linear],  # TODO (OPTIONAL) Set neural network's output neurons activation function
            alpha = [0.003, 0.005], # TODO (OPTIONAL) Set neural network's learning rate
            epochs = [250, 300],  # TODO (OPTIONAL) Set neural network's  epochs
            batch_size = [8, 12], # TODO (OPTIONAL) Set neural network's mini-batch size
            verbose = [True], # TODO (OPTIONAL) Set to allow neural network to print debugging statements during training 
            seed = [42], # TODO (OPTIONAL) Set the neural network to random state seed 
        )
        
        data_prep_kwargs = dict(
            # TODO (OPTIONAL) Add Pipeline() definitions below
            target_pipe = None,
            # TODO (REQUIRED) Add Pipeline() definitions below
            feature_pipe = Pipeline([
                ('standardize', Standardization())
            ]),
            # TODO (OPTIONAL) Set the names of the features/columns to use for the Housing dataset
            use_features = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE',
                            'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'],
        )
        
    class NeuralNetworkClassifier():
        """Kwargs for classifier neural network and data prep"""
        model_kwargs = dict(
            neurons_per_layer = [128, 64, 10], # TODO (REQUIRED) Set neural network neurons per layer
            learning_curve_loss = nll,
            delta_loss_func = delta_softmax_nll, # TODO (REQUIRED) Set neural network's loss function derivative
            g_hidden = ReLU, # TODO (REQUIRED) Set neural network's hidden neurons activation function
            g_output = Softmax,  # TODO (REQUIRED) Set neural network's output neurons activation function
            alpha = 0.01, # TODO (REQUIRED) Set neural network's learning rate
            epochs = 10,  # TODO (REQUIRED) Set neural network's  epochs
            batch_size = 64, # TODO (REQUIRED) Set neural network's mini-batch size
            verbose = True, # TODO (OPTIONAL) Set to allow neural network to print debugging statements during training 
            seed = 42, # TODO (OPTIONAL) Set the neural network to random state seed 
        )
        
        # (OPTIONAL) model kwargs used for hyper-parameter search.
        # EVERY argument must be wrapped in a list.
        search_model_kwargs = dict(
            neurons_per_layer = [[128, 64, 10], [256, 128, 10]], # TODO (OPTIONAL) Set neural network neurons per layer
            learning_curve_loss = [nll],
            delta_loss_func = [delta_softmax_nll], # TODO (OPTIONAL) Set neural network's loss function derivative
            g_hidden = [ReLU, Tanh], # TODO (OPTIONAL) Set neural network's hidden neurons activation function
            g_output = [Softmax],  # TODO (OPTIONAL) Set neural network's output neurons activation function
            alpha = [0.01, 0.005], # TODO (OPTIONAL) Set neural network's learning rate
            epochs = [10, 15],  # TODO (OPTIONAL) Set neural network's  epochs
            batch_size = [64, 128], # TODO (OPTIONAL) Set neural network's mini-batch size
            verbose = [True], # TODO (OPTIONAL) Set to allow neural network to print debugging statements during training 
            seed = [42], # TODO (OPTIONAL) Set the neural network to random state seed 
        )
        
        data_prep_kwargs = dict(
            target_pipe = Pipeline([
                ('one_hot', OneHotEncoding())
            ]),
            feature_pipe = Pipeline([
                ('normalize', ImageNormalization())
            ]),
        )


## Define Model running (training/fit and testing/evaluate)
**Don't change this part**

In [15]:
def get_name(obj):
    try:
        if hasattr(obj, '__name__'):
            return obj.__name__
        else:
            return obj
    except Exception as e:
        return obj
    
def catch_and_throw(e, err):
    trace = traceback.format_exc()
    print(err + f"\n{trace}")
    raise e

In [16]:
class RunModel():
    t1 = '\t'
    t2 = '\t\t'
    t3 = '\t\t\t'
    def __init__(self, model, model_params):
        self.model_name = model.__name__
        self.model_params = model_params
        self.model = self.build_model(model, model_params)

    def build_model(self, model, model_params):
        print("="*50)
        print(f"Building model {self.model_name}")
        
        try:
            model = model(**model_params)
        except Exception as e:
            err = f"Exception caught while building model for {self.model_name}:"
            catch_and_throw(e, err)
        return model
    
    def fit(self, *args, **kwargs):
        print(f"Training {self.model_name}...")
        print(f"{self.t1}Using hyperparameters: ")
        [print(f"{self.t2}{n} = {get_name(v)}")for n, v in self.model_params.items()]
        try: 
            return self._fit(*args, **kwargs)
        except Exception as e:
            err = f"Exception caught while training model for {self.model_name}:"
            catch_and_throw(e, err)
            
    def _fit(self, X, y, metrics=None, pass_y=False):
        if pass_y:
            self.model.fit(X, y)
        else:
             self.model.fit(X)
        preds = self.model.predict(X)
        scores = self.get_metrics(y, preds, metrics, prefix='Train')
        return scores
    
    def evaluate(self, *args, **kwargs):
        print(f"Evaluating {self.model_name}...")
        try:
            return self._evaluate(*args, **kwargs)
        except Exception as e:
            err = f"Exception caught while evaluating model for {self.model_name}:"
            catch_and_throw(e, err)
        

    def _evaluate(self, X, y, metrics, prefix=''):
        preds = self.model.predict(X)
        scores = self.get_metrics(y, preds, metrics, prefix)      
        return scores
    
    def predict(self, X):
        try:
            preds = self.model.predict(X)
        except Exception as e:
            err = f"Exception caught while making predictions for model {self.model_name}:"
            catch_and_throw(e, err)
            
        return preds
    
    def get_metrics(self, y, y_hat, metrics, prefix=''):
        scores = {}
        for name, metric in metrics.items():
            score = metric(y, y_hat)
            display_score = round(score, 3)
            scores[name] = score
            print(f"{self.t2}{prefix} {name}: {display_score}")
        return scores

In [17]:
def run_eval(eval_stage='validation'):
    main_timer = Timer()
    main_timer.start()
    
    task_info = [
       dict(
            model=NeuralNetworkRegressor,
            name='NeuralNetworkRegressor',
            data=HousingDataPreparation,
            data_prep=dict(return_array=True),
            metrics=dict(mse=mse),
            eval_metric='mse',
            rubric=rubric_regression,
            trn_score=9999,
            eval_score=9999,
            successful=False,
        ),
        dict(
            model=NeuralNetworkClassifier,
            name='NeuralNetworkClassifier',
            data=MNISTDataPreparation,
            data_prep=dict(return_array=True),
            metrics=dict(acc=accuracy),
            eval_metric='acc',
            rubric=rubric_classification,
            trn_score=0,
            eval_score=0,
            successful=False,
        ),
    ]
    
    total_points = 0

    for info in task_info:
        task_timer =  Timer()
        task_timer.start()
        try:
            params = HyperParametersAndTransforms.get_params(info['name'])
            model_kwargs = params.get('model_kwargs', {})
            data_prep_kwargs = params.get('data_prep_kwargs', {})
            
            run_model = RunModel(info['model'], model_kwargs)
            data = info['data'](**data_prep_kwargs)
            X_trn, y_trn, X_vld, y_vld = data.data_prep(**info['data_prep'])

            trn_scores = run_model.fit(X_trn, y_trn, info['metrics'], pass_y=True)
            eval_scores = run_model.evaluate(X_vld, y_vld, info['metrics'], prefix=eval_stage.capitalize())
            
            if not math.isnan(trn_scores[info['eval_metric']]):
                info['trn_score'] = trn_scores[info['eval_metric']]
            if not math.isnan(eval_scores[info['eval_metric']]):
                info['eval_score'] = eval_scores[info['eval_metric']]
            
            info['successful'] = True
                
        except Exception as e:
            track = traceback.format_exc()
            print("The following exception occurred while executing this test case:\n", track)
        task_timer.stop()
        
        print("")
        points = info['rubric'](info['eval_score'])
        print(f"Points Earned: {points}")
        total_points += points

    print("="*50)
    print('')
    main_timer.stop()

    successful_tests = summary(task_info)
    final_mse, final_acc = get_eval_scores(task_info)
    total_points = int(round(total_points))
    
    print(f"Tests passed: {successful_tests}/{ len(task_info)}, Total Points: {total_points}/80\n")
    print(f"Final {eval_stage.capitalize()} MSE: {final_mse}")
    print(f"Final {eval_stage.capitalize()} Accuracy: {final_acc}")

    return total_points, main_timer.last_elapsed_time, final_mse, final_acc

def summary(task_info):
    successful_tests = 0

    for info in task_info:
        if info['successful']:
            successful_tests += 1
    
    if successful_tests == 0:
        return successful_tests

    return successful_tests

def get_eval_scores(task_info):
    return [i['eval_score'] for i in task_info]

## Evaluation Related Functions
Don't change this section.

In [18]:
def rubric_regression(mse, max_score=40):
    thresh = 12.5
    if mse <= thresh:
        score_percent = 100
    elif mse is not None:
        score_percent = (thresh / mse) * 100
        if score_percent < 40:
            score_percent = 40
    else:
        score_percent = 20
    score = max_score * score_percent / 100.0

    return score

def rubric_classification(acc, max_score=40):
    score_percent = 0
    if acc >= 0.93:
        score_percent = 100
    elif acc >= 0.85:
        score_percent = 90
    elif acc >= 0.70:
        score_percent = 80
    elif acc >= 0.60:
        score_percent = 70
    elif acc >= 0.50:
        score_percent = 60
    elif acc >= 0.40:
        score_percent = 55
    elif acc >= 0.30:
        score_percent = 50
    elif acc >= 0.20:
        score_percent = 45
    else:
        score_percent = 40
    score = max_score * score_percent / 100.0 
    return score

# Test your code
Run the following cell to test your code (or for **debugging**).

In [19]:
if __name__ == "__main__":
    run_eval()

Building model NeuralNetworkRegressor
Returning data as NumPy array...
Attempting to use the following features: ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']
Skipping download. File already exists: /Users/hritz/Downloads/Neural Network Mini Project/datasets/data/housing.train

Skipping download. File already exists: /Users/hritz/Downloads/Neural Network Mini Project/datasets/data/housing.val

Skipping download. File already exists: /Users/hritz/Downloads/Neural Network Mini Project/datasets/data/housing.names

Training NeuralNetworkRegressor...
	Using hyperparameters: 
		neurons_per_layer = [32, 24, 16, 8, 1]
		learning_curve_loss = sse
		delta_loss_func = delta_mse
		g_hidden = ReLU
		g_output = Linear
		alpha = 0.0005
		epochs = 300
		batch_size = 8
		verbose = True
		seed = 42
Epoch: 1
	Training loss: 436.2530226754229
Epoch: 2
	Training loss: 33.964219642969454
Epoch: 3
	Training loss: 19.841622259736624
Epoch: 4
	Training loss: 

  df_train = pd.read_csv(self.data["paths"]["train"], delim_whitespace=True,
  df_val = pd.read_csv(self.data["paths"]["val"], delim_whitespace=True,


	Training loss: 6.127995743982819
Epoch: 72
	Training loss: 5.588752815241071
Epoch: 73
	Training loss: 5.941494518202342
Epoch: 74
	Training loss: 5.604765366103625
Epoch: 75
	Training loss: 5.849081147445295
Epoch: 76
	Training loss: 5.590917878698715
Epoch: 77
	Training loss: 5.634074394172415
Epoch: 78
	Training loss: 5.545277458262198
Epoch: 79
	Training loss: 5.512230540699442
Epoch: 80
	Training loss: 5.546890845015759
Epoch: 81
	Training loss: 5.524129439590434
Epoch: 82
	Training loss: 5.225087362185905
Epoch: 83
	Training loss: 5.696768724014329
Epoch: 84
	Training loss: 5.324653516479295
Epoch: 85
	Training loss: 5.491525503581388
Epoch: 86
	Training loss: 5.182119657083438
Epoch: 87
	Training loss: 5.122268769409226
Epoch: 88
	Training loss: 5.1853943130195
Epoch: 89
	Training loss: 5.185666082263679
Epoch: 90
	Training loss: 5.231731196501061
Epoch: 91
	Training loss: 5.115134523418452
Epoch: 92
	Training loss: 4.919209278211144
Epoch: 93
	Training loss: 4.771350027807817
