In [12]:
import numpy as np
from numpy import ndarray
# test_size=0.3表示测试集占30%的数据，random_state是一个种子，确保每次分割的方式都是相同的

from typing import List

In [13]:
def assert_same_shape(array: ndarray,
                      array_grad: ndarray):
    assert array.shape == array_grad.shape, \
        '''
        Two ndarrays should have the same shape;
        instead, first ndarray's shape is {0}
        and second ndarray's shape is {1}.
        '''.format(tuple(array_grad.shape), tuple(array.shape))
    return None

# `Operation` and `ParamOperation`

In [14]:
class Operation(object):
    '''
    Base class for an "operation" in a neural network.
    基类，定义神经网络中的一个操作。
    每个具体的操作（如加法、乘法、激活函数）都将从此类派生。
    '''
    def __init__(self):
        pass

    def forward(self, input_: ndarray):
        '''
        Stores input in the self._input instance variable
        Calls the self._output() function.
        前向传播：计算此操作的输出。
        :param input_: 输入数据
        :return: 该操作的输出
        '''
        self.input_ = input_

        self.output = self._output()

        return self.output


    def backward(self, output_grad: ndarray) -> ndarray:
        '''
        Calls the self._input_grad() function.
        Checks that the appropriate shapes match.
        反向传播：计算此操作的输入梯度。
        :param output_grad: 上游传来的梯度
        :return: 对于该操作输入的梯度
        '''
        assert_same_shape(self.output, output_grad)

        self.input_grad = self._input_grad(output_grad)

        assert_same_shape(self.input_, self.input_grad)
        return self.input_grad


    def _output(self) -> ndarray:
        '''
        The _output method must be defined for each Operation
        抽象方法，定义如何计算此操作的输出。
        每个从Operation派生的类都必须实现此方法。
        '''
        raise NotImplementedError()


    def _input_grad(self, output_grad: ndarray) -> ndarray:
        '''
        The _input_grad method must be defined for each Operation
        抽象方法，定义如何计算此操作的输入梯度。
        每个从Operation派生的类都必须实现此方法。
        :param output_grad: 上游传来的梯度
        :return: 对于该操作输入的梯度
        '''
        raise NotImplementedError()

In [15]:
class ParamOperation(Operation):
    '''
    An Operation with parameters.
    ParamOperation类的特点是它包含有参数的运算操作，例如权重乘法或带有偏差的加法。这与不涉及参数的基本操作（例如激活函数）相对立。
    '''

    def __init__(self, param: ndarray) -> ndarray:
        '''
        The ParamOperation method
        '''
        super().__init__()
        self.param = param

    def backward(self, output_grad: ndarray) -> ndarray:
        '''
        Calls self._input_grad and self._param_grad.
        Checks appropriate shapes.
        '''

        assert_same_shape(self.output, output_grad)

        self.input_grad = self._input_grad(output_grad) # 计算输入梯度。
        self.param_grad = self._param_grad(output_grad) # 计算参数梯度。

        assert_same_shape(self.input_, self.input_grad)
        assert_same_shape(self.param, self.param_grad)

        return self.input_grad

    def _param_grad(self, output_grad: ndarray) -> ndarray:
        '''
        Every subclass of ParamOperation must implement _param_grad.
        '''
        raise NotImplementedError()

## Specific `Operation`s

In [16]:
class WeightMultiply(ParamOperation):
    '''
    Weight multiplication operation for a neural network.
    该类定义了神经网络中的权重乘法操作，即输入数据与权重之间的点积。
    '''

    def __init__(self, W: ndarray):
        '''
        Initialize Operation with self.param = W.
        '''
        super().__init__(W)

    def _output(self) -> ndarray:
        '''
        Compute output.
        '''
        return np.dot(self.input_, self.param)

    def _input_grad(self, output_grad: ndarray) -> ndarray:
        '''
        Compute input gradient.
        该方法定义了如何计算"上游传来的梯度"相对于输入的梯度。它基于传入的输出梯度和权重参数的转置来计算输入梯度。
        '''
        return np.dot(output_grad, np.transpose(self.param, (1, 0)))

    def _param_grad(self, output_grad: ndarray)  -> ndarray:
        '''
        Compute parameter gradient.
        该方法定义了如何计算"上游传来的梯度"相对于权重参数的梯度。它基于输入的转置和传入的输出梯度来计算权重参数的梯度。
        '''        
        return np.dot(np.transpose(self.input_, (1, 0)), output_grad)

In [17]:
class BiasAdd(ParamOperation):
    '''
    Compute bias addition.
    实现偏置加法操作。
    '''

    def __init__(self,
                 B: ndarray):
        '''
        Initialize Operation with self.param = B.
        Check appropriate shape.
        '''
        assert B.shape[0] == 1
        
        super().__init__(B)

    def _output(self) -> ndarray:
        '''
        Compute output.
         计算输出。
        对于每一个输入数据点，都加上相同的偏置。
        '''
        return self.input_ + self.param

    def _input_grad(self, output_grad: ndarray) -> ndarray:
        '''
        Compute input gradient.
        偏置加法对所有输入的影响是相同的，所以我们只需返回与output_grad形状相同的值。
        '''
        return np.ones_like(self.input_) * output_grad

    def _param_grad(self, output_grad: ndarray) -> ndarray:
        '''
        Compute parameter gradient.
        # 由于我们处理的是批量数据，因此需要对所有数据点的梯度求和，得到偏置对整个批量数据的总影响。
        # reshape是为了确保结果的形状与偏置的形状相同。
        '''
        param_grad = np.ones_like(self.param) * output_grad
        return np.sum(param_grad, axis=0).reshape(1, param_grad.shape[1])

In [18]:
class Sigmoid(Operation):
    '''
    Sigmoid activation function.
    Sigmoid 激活函数。
    '''

    def __init__(self) -> None:
        '''Pass'''
        super().__init__()

    def _output(self) -> ndarray:
        '''
        Compute output.
        计算输出。
        使用Sigmoid函数公式: 1 / (1 + exp(-x))
        '''
        return 1.0/(1.0+np.exp(-1.0 * self.input_))

    def _input_grad(self, output_grad: ndarray) -> ndarray:
        '''
        Compute input gradient.
        计算输入梯度。
        使用Sigmoid函数的导数公式: sigmoid(x) * (1 - sigmoid(x))
        然后乘以上游传来的梯度。
        '''
        sigmoid_backward = self.output * (1.0 - self.output)
        input_grad = sigmoid_backward * output_grad
        return input_grad

In [19]:
class Linear(Operation):
    '''
    "Identity" activation function
    恒等操作，它不会改变其输入。因此，它的输出和输入是相同的，而输入的梯度也是上游传来的梯度，没有任何变化。
    '''

    def __init__(self) -> None:
        '''Pass'''        
        super().__init__()

    def _output(self) -> ndarray:
        '''Pass through'''
        return self.input_

    def _input_grad(self, output_grad: ndarray) -> ndarray:
        '''Pass through'''
        return output_grad

# `Layer` and `Dense`

In [20]:
class Layer(object):
    '''
    A "layer" of neurons in a neural network.
    神经网络中的一个“层”。
    '''

    def __init__(self,
                 neurons: int):
        '''
        The number of "neurons" roughly corresponds to the "breadth" of the layer
        初始化方法。
        "neurons" 的数量大致对应于该层的“宽度”或“深度”。
        '''
        self.neurons = neurons
        self.first = True
        self.params: List[ndarray] = []
        self.param_grads: List[ndarray] = []
        self.operations: List[Operation] = []

    def _setup_layer(self, num_in: int) -> None:
        '''
        The _setup_layer function must be implemented for each layer
        每个层都必须实现的_setup_layer函数。
        该函数用于根据输入设置层。
        '''
        raise NotImplementedError()

    def forward(self, input_: ndarray) -> ndarray:
        '''
        Passes input forward through a series of operations
        通过一系列操作向前传递输入。
        如果是第一次执行，则进行层设置。
        ''' 
        if self.first:
            self._setup_layer(input_)
            self.first = False

        self.input_ = input_

        for operation in self.operations:

            input_ = operation.forward(input_)

        self.output = input_

        return self.output

    def backward(self, output_grad: ndarray) -> ndarray:
        '''
        Passes output_grad backward through a series of operations
        Checks appropriate shapes
        通过一系列操作向后传递output_grad。
        检查形状是否相符。
        '''

        assert_same_shape(self.output, output_grad)

        for operation in reversed(self.operations):
            output_grad = operation.backward(output_grad)

        input_grad = output_grad
        
        self._param_grads()

        return input_grad

    def _param_grads(self) -> ndarray:
        '''
        Extracts the _param_grads from a layer's operations
        从层的操作中提取参数的梯度。
        '''

        self.param_grads = []
        for operation in self.operations:
            if issubclass(operation.__class__, ParamOperation):
                self.param_grads.append(operation.param_grad)

    def _params(self) -> ndarray:
        '''
        Extracts the _params from a layer's operations
        从层的操作中提取参数。
        '''

        self.params = []
        for operation in self.operations:
            if issubclass(operation.__class__, ParamOperation):
                self.params.append(operation.param)

In [21]:
class Dense(Layer):
    '''
    A fully connected layer which inherits from "Layer"
    全连接层，继承自"Layer"类。
    '''
    def __init__(self,
                 neurons: int,
                 activation: Operation = Sigmoid()):
        '''
        Requires an activation function upon initialization
        初始化时需要一个激活函数。
        
        参数:
            neurons: 神经元的数量，即该层的宽度。
            activation: 该层使用的激活函数，默认为Sigmoid函数。
        '''
        super().__init__(neurons)
        self.activation = activation

    def _setup_layer(self, input_: ndarray) -> None:
        '''
        Defines the operations of a fully connected layer.
        定义全连接层的操作。
        
        参数:
            input_: 上一层的输出，用于确定当前层的输入维度。
        '''
        if self.seed:
            np.random.seed(self.seed)

        self.params = []

        # weights
        self.params.append(np.random.randn(input_.shape[1], self.neurons))

        # bias
        self.params.append(np.random.randn(1, self.neurons))

        self.operations = [WeightMultiply(self.params[0]),
                           BiasAdd(self.params[1]),
                           self.activation]

        return None

# `Loss` and `MeanSquaredError`

In [22]:
class Loss(object):
    '''
    The "loss" of a neural network
    神经网络的“损失”类。
    '''

    def __init__(self):
        '''Pass'''
        pass

    def forward(self, prediction: ndarray, target: ndarray) -> float:
        '''
        Computes the actual loss value
        计算实际的损失值。
        参数:
        - prediction: 神经网络的预测值
        - target: 真实的目标值

        '''
        assert_same_shape(prediction, target)

        self.prediction = prediction
        self.target = target

        loss_value = self._output()

        return loss_value

    def backward(self) -> ndarray:
        '''
        Computes gradient of the loss value with respect to the input to the loss function
        计算损失值相对于损失函数输入的梯度。
        返回:
        - 输入的梯度
        '''
        self.input_grad = self._input_grad()

        assert_same_shape(self.prediction, self.input_grad)

        return self.input_grad

    def _output(self) -> float:
        '''
        Every subclass of "Loss" must implement the _output function.
        每个“Loss”子类都必须实现_output函数。
        这个函数的目的是计算损失值。
        '''
        raise NotImplementedError()

    def _input_grad(self) -> ndarray:
        '''
        Every subclass of "Loss" must implement the _input_grad function.
        每个“Loss”子类都必须实现_input_grad函数。
        这个函数的目的是计算损失相对于其输入的梯度。
        '''
        raise NotImplementedError()

In [23]:
class MeanSquaredError(Loss):

    def __init__(self) -> None:
        '''Pass'''
        super().__init__()

    def _output(self) -> float:
        '''
        Computes the per-observation squared error loss
        计算均方误差损失
        '''
        loss = (
            np.sum(np.power(self.prediction - self.target, 2)) / 
            self.prediction.shape[0]
        )

        return loss

    def _input_grad(self) -> ndarray:
        '''
        Computes the loss gradient with respect to the input for MSE loss
        计算均方误差损失相对于其输入的梯度。
        '''        

        return 2.0 * (self.prediction - self.target) / self.prediction.shape[0]

# `NeuralNetwork`

In [24]:
class NeuralNetwork(object):
    '''
    The class for a neural network.
    神经网络类。
    '''
    def __init__(self, 
                 layers: List[Layer],
                 loss: Loss,
                 seed: int = 1) -> None:
        '''
        Neural networks need layers, and a loss.
        神经网络需要层和损失函数来初始化。

        参数:
        - layers: 神经网络的层的列表
        - loss: 损失函数
        - seed: 随机种子，用于初始化权重

        '''
        self.layers = layers
        self.loss = loss
        self.seed = seed
        if seed:
            for layer in self.layers:
                setattr(layer, "seed", self.seed)        

    def forward(self, x_batch: ndarray) -> ndarray:
        '''
        Passes data forward through a series of layers.
        通过一系列的层前向传播数据。
        '''
        x_out = x_batch
        for layer in self.layers:
            x_out = layer.forward(x_out)

        return x_out

    def backward(self, loss_grad: ndarray) -> None:
        '''
        Passes data backward through a series of layers.
        通过一系列的层反向传播数据。
        '''

        grad = loss_grad
        for layer in reversed(self.layers):
            grad = layer.backward(grad)

        return None

    def train_batch(self,
                    x_batch: ndarray,
                    y_batch: ndarray) -> float:
        '''
        Passes data forward through the layers.
        Computes the loss.
        Passes data backward through the layers.
        通过层前向传播数据，计算损失，然后通过层反向传播数据。
        '''
        
        predictions = self.forward(x_batch)

        loss = self.loss.forward(predictions, y_batch)

        self.backward(self.loss.backward())

        return loss
    
    def params(self):
        '''
        Gets the parameters for the network.
        '''
        for layer in self.layers:
            yield from layer.params

    def param_grads(self):
        '''
        Gets the gradient of the loss with respect to the parameters for the network.
        '''
        for layer in self.layers:
            yield from layer.param_grads    

# `Optimizer` and `SGD`

In [25]:
class Optimizer(object):
    '''
    Base class for a neural network optimizer.
    神经网络优化器的基类。
    在机器学习和深度学习中，优化器是用于调整模型参数以最小化或最大化某个目标（通常是最小化损失函数）的算法。
    学习率是控制参数更新步长的超参数，它决定了模型参数在每次迭代时应该移动多远以最小化损失函数。
    '''
    def __init__(self,
                 lr: float = 0.01):
        '''
        Every optimizer must have an initial learning rate.
        每个优化器都必须有一个初始的学习率。
        '''
        self.lr = lr

    def step(self) -> None:
        '''
        Every optimizer must implement the "step" function.
        每个优化器都必须实现"step"函数，用于更新网络中的参数。
        '''
        pass

In [26]:
class SGD(Optimizer):
    '''
    Stochasitc gradient descent optimizer.
    随机梯度下降优化器。
    '''    
    def __init__(self,
                 lr: float = 0.01) -> None:
        '''Pass'''
        super().__init__(lr)

    def step(self):
        '''
        For each parameter, adjust in the appropriate direction, with the magnitude of the adjustment 
        based on the learning rate.
        对于每个参数，根据学习率的大小，沿着减少损失的方向进行调整。
        通过学习率控制调整的幅度。
        zip(self.net.params(), self.net.param_grads()) 是将这些参数和梯度配对起来。
        简单地说，它会同时从 self.net.params() 和 self.net.param_grads() 中取出一个值，并将这两个值打包成一个元组，
        然后将所有这样的元组组合成一个迭代器。


        '''
        for (param, param_grad) in zip(self.net.params(),
                                       self.net.param_grads()):

            param -= self.lr * param_grad

# `Trainer`

In [27]:
from copy import deepcopy
from typing import Tuple

class Trainer(object):
    '''
    Trains a neural network
    用于训练神经网络的类
    '''
    def __init__(self,
                 net: NeuralNetwork,
                 optim: Optimizer) -> None:
        '''
        Requires a neural network and an optimizer in order for training to occur. 
        Assign the neural network as an instance variable to the optimizer.
           需要一个神经网络和一个优化器来进行训练。
        将神经网络作为优化器的实例变量。
        '''
        self.net = net
        self.optim = optim
        self.best_loss = 1e9 # 初始化一个非常大的最佳损失值
        setattr(self.optim, 'net', self.net)

    # 从给定的数据集X和y中生成小批量的数据，这种小批量的方法被称为批量梯度下降，它可以使神经网络的训练更加高效。
    # 这个函数返回的是一个生成器(generator)，所以其结果是可以遍历的。
    def generate_batches(self,
                         X: ndarray,
                         y: ndarray,
                         size: int = 32) -> Tuple[ndarray]:
        '''
        Generates batches for training 
        为训练生成数据批次
        '''
        assert X.shape[0] == y.shape[0], \
        '''
        features and target must have the same number of rows, instead
        features has {0} and target has {1}
        '''.format(X.shape[0], y.shape[0])

        N = X.shape[0]

        for ii in range(0, N, size):
            X_batch, y_batch = X[ii:ii+size], y[ii:ii+size]

            yield X_batch, y_batch

            
    def fit(self, X_train: ndarray, y_train: ndarray,
            X_test: ndarray, y_test: ndarray,
            epochs: int=100,
            eval_every: int=10,
            batch_size: int=32,
            seed: int = 1,
            restart: bool = True)-> None:
        '''
        Fits the neural network on the training data for a certain number of epochs.
        Every "eval_every" epochs, it evaluated the neural network on the testing data.
        '''
        # 设定随机种子，确保训练的复现性
        np.random.seed(seed)
        # 如果restart为True，则重置网络的每一层，并将best_loss设为一个很大的值
        if restart:
            for layer in self.net.layers:
                layer.first = True

            self.best_loss = 1e9
        # 进行epochs次的训练周期
        for e in range(epochs):
            # 每eval_every个周期，备份当前模型，以便后续可能需要回滚到此模型
            if (e+1) % eval_every == 0:
                # for early stopping
                last_model = deepcopy(self.net)
            # 将训练数据随机打乱，有助于提高模型的泛化能力
            X_train, y_train = permute_data(X_train, y_train)
            # 使用生成器函数生成数据批次
            batch_generator = self.generate_batches(X_train, y_train,
                                                    batch_size)
            # 对每个数据批次进行训练
            for ii, (X_batch, y_batch) in enumerate(batch_generator):
                # 使用神经网络对当前批次进行前向传播和反向传播
                self.net.train_batch(X_batch, y_batch)
                # 根据优化器调整网络参数
                self.optim.step()
            # 每eval_every个周期在测试数据上评估模型
            if (e+1) % eval_every == 0:
                # 对测试数据进行预测
                test_preds = self.net.forward(X_test)
                # 计算预测值与真实值之间的损失
                loss = self.net.loss.forward(test_preds, y_test)
                # 如果当前损失小于之前的最佳损失，则更新最佳损失值
                if loss < self.best_loss:
                    print(f"Validation loss after {e+1} epochs is {loss:.3f}")
                    self.best_loss = loss
                else:
                    # 如果损失增加，表示模型可能过拟合了，因此我们停止训练
                    # 并恢复到之前保存的模型状态
                    print(f"""Loss increased after epoch {e+1}, final loss was {self.best_loss:.3f}, using the model from epoch {e+1-eval_every}""")
                    self.net = last_model
                    # ensure self.optim is still updating self.net
                    setattr(self.optim, 'net', self.net)
                    break

#### Evaluation metrics

In [28]:
def mae(y_true: ndarray, y_pred: ndarray):
    '''
    Compute mean absolute error for a neural network.
    计算神经网络的平均绝对误差（MAE）。
    '''    
    return np.mean(np.abs(y_true - y_pred))

def rmse(y_true: ndarray, y_pred: ndarray):
    '''
    Compute root mean squared error for a neural network.
     计算神经网络的均方根误差（RMSE）。
    '''
    return np.sqrt(np.mean(np.power(y_true - y_pred, 2)))

def eval_regression_model(model: NeuralNetwork,
                          X_test: ndarray,
                          y_test: ndarray):
    '''
    Compute mae and rmse for a neural network.
    评估神经网络的MAE和RMSE。
    '''
    preds = model.forward(X_test)
    preds = preds.reshape(-1, 1)
    print("Mean absolute error: {:.2f}".format(mae(preds, y_test)))
    print()
    print("Root mean squared error {:.2f}".format(rmse(preds, y_test)))

In [29]:
lr = NeuralNetwork(
    layers=[Dense(neurons=1,
                   activation=Linear())],
    loss=MeanSquaredError(),
    seed=20190501
)

nn = NeuralNetwork(
    layers=[Dense(neurons=13,
                   activation=Sigmoid()),
            Dense(neurons=1,
                   activation=Linear())],
    loss=MeanSquaredError(),
    seed=20190501
)

dl = NeuralNetwork(
    layers=[Dense(neurons=13,
                   activation=Sigmoid()),
            Dense(neurons=13,
                   activation=Sigmoid()),
            Dense(neurons=1,
                   activation=Linear())],
    loss=MeanSquaredError(),
    seed=20190501
)

### Read in the data, train-test split etc.

In [30]:
from sklearn.datasets import load_boston

boston = load_boston()
data = boston.data
target = boston.target
features = boston.feature_names

ImportError: 
`load_boston` has been removed from scikit-learn since version 1.2.

The Boston housing prices dataset has an ethical problem: as
investigated in [1], the authors of this dataset engineered a
non-invertible variable "B" assuming that racial self-segregation had a
positive impact on house prices [2]. Furthermore the goal of the
research that led to the creation of this dataset was to study the
impact of air quality but it did not give adequate demonstration of the
validity of this assumption.

The scikit-learn maintainers therefore strongly discourage the use of
this dataset unless the purpose of the code is to study and educate
about ethical issues in data science and machine learning.

In this special case, you can fetch the dataset from the original
source::

    import pandas as pd
    import numpy as np

    data_url = "http://lib.stat.cmu.edu/datasets/boston"
    raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
    data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
    target = raw_df.values[1::2, 2]

Alternative datasets include the California housing dataset and the
Ames housing dataset. You can load the datasets as follows::

    from sklearn.datasets import fetch_california_housing
    housing = fetch_california_housing()

for the California housing dataset and::

    from sklearn.datasets import fetch_openml
    housing = fetch_openml(name="house_prices", as_frame=True)

for the Ames housing dataset.

[1] M Carlisle.
"Racist data destruction?"
<https://medium.com/@docintangible/racist-data-destruction-113e3eff54a8>

[2] Harrison Jr, David, and Daniel L. Rubinfeld.
"Hedonic housing prices and the demand for clean air."
Journal of environmental economics and management 5.1 (1978): 81-102.
<https://www.researchgate.net/publication/4974606_Hedonic_housing_prices_and_the_demand_for_clean_air>


In [35]:
import pandas as pd
import numpy as np

# 从原始来源获取数据
data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]

# 以下是 Boston 房价数据集的特征名称，与原始 load_boston 返回的接口相匹配
features = np.array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'])

In [36]:
# Scaling the data

# 导入数据标准化工具
# 标准化是一种重要的预处理步骤，尤其在训练神经网络时。这样可以确保每个特征都在相似的尺度上，有助于模型更快地收敛。
from sklearn.preprocessing import StandardScaler
s = StandardScaler()
# 使用数据来训练标准化对象，并对数据进行标准化处理
data = s.fit_transform(data)

In [37]:
# 这个函数的目的是将一维的ndarray对象转换为二维的ndarray对象，通常这样做是为了满足某些操作或函数的输入需求。
# 这个函数提供了两种转换方式：转换为列向量或行向量。
def to_2d_np(a: ndarray, 
          type: str="col") -> ndarray:
    '''
    Turns a 1D Tensor into 2D
    '''

    assert a.ndim == 1, \
    "Input tensors must be 1 dimensional"
    
    if type == "col":        
        return a.reshape(-1, 1)
    elif type == "row":
        return a.reshape(1, -1)

In [38]:
from sklearn.model_selection import train_test_split
# test_size=0.3表示测试集占30%的数据，random_state是一个种子，确保每次分割的方式都是相同的
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.3, random_state=80718)

# make target 2d array
y_train, y_test = to_2d_np(y_train), to_2d_np(y_test)

### Train the three models

In [39]:
# helper function

def permute_data(X, y):
    perm = np.random.permutation(X.shape[0])
    return X[perm], y[perm]

In [40]:
trainer = Trainer(lr, SGD(lr=0.01))

trainer.fit(X_train, y_train, X_test, y_test,
       epochs = 50,
       eval_every = 10,
       seed=20190501);
print()
eval_regression_model(lr, X_test, y_test)

Validation loss after 10 epochs is 30.293
Validation loss after 20 epochs is 28.469
Validation loss after 30 epochs is 26.293
Validation loss after 40 epochs is 25.541
Validation loss after 50 epochs is 25.087

Mean absolute error: 3.52

Root mean squared error 5.01


In [41]:
trainer = Trainer(nn, SGD(lr=0.01))

trainer.fit(X_train, y_train, X_test, y_test,
       epochs = 50,
       eval_every = 10,
       seed=20190501);
print()
eval_regression_model(nn, X_test, y_test)

Validation loss after 10 epochs is 27.435
Validation loss after 20 epochs is 21.839
Validation loss after 30 epochs is 18.918
Validation loss after 40 epochs is 17.195
Validation loss after 50 epochs is 16.215

Mean absolute error: 2.60

Root mean squared error 4.03


In [42]:
trainer = Trainer(dl, SGD(lr=0.01))

trainer.fit(X_train, y_train, X_test, y_test,
       epochs = 50,
       eval_every = 10,
       seed=20190501);
print()
eval_regression_model(dl, X_test, y_test)

Validation loss after 10 epochs is 44.143
Validation loss after 20 epochs is 25.278
Validation loss after 30 epochs is 22.339
Validation loss after 40 epochs is 16.500
Validation loss after 50 epochs is 14.655

Mean absolute error: 2.45

Root mean squared error 3.83
