Authored by Arqam Patel. 

CC BY 4.0 License

In [1]:
import torch

$$\hat{y} = X \beta $$

$$L_{SSE} = (y-\hat{y})'(y-\hat{y}) $$


$$ \frac{dL}{d\beta} = X'X \beta - X'y = X'(\hat{y} - y) $$

In [35]:
class NeuralMLR():
    def __init__(self, p, lr = 0.01):
        # initialise parameters as px1 column vector of zeros
        self.params = torch.zeros(p, 1)

        self.lr = lr
        
    def forward(self, data):
        # data: n x p-1
        n = data.shape[0]
        X = torch.cat((data, torch.ones(n, 1)), 1)
        return X @ self.params
    
    def loss(self, data, true):
        pred = self.forward(data)
        residuals = true - pred
        loss = residuals.t() @ residuals
        return loss
    
    def grad(self, data, true):
        pred = self.forward(data)
        n = data.shape[0]
        X = torch.cat((data, torch.ones(n, 1)), 1)
        grad = X.t() @ (pred - true)
        return grad
    
    def step(self, data, true):
        self.params = self.params - self.lr * self.grad(data, true)
    
    def train(self, data, true, n_steps):
        for i in range(n_steps):
            print(f"Step no {i}")
            self.step(data, true)
            print(f"params: {self.params}")
            print(f"Loss {self.loss(data, true)}")
        

In [31]:
# training data

x1 = torch.randn(100, 1)
x2 = torch.randn(100, 1)
x3 = torch.randn(100, 1)

epsilon = torch.randn(100, 1)/10

y = 3*x1 + 4*x2 - 5*x3 + epsilon + 2

In [32]:
data = torch.cat((x1, x2, x3), 1)

In [36]:
model = NeuralMLR(4)

In [37]:
model.train(data, y, 100)

Step no 0
params: tensor([[ 2.2542],
        [ 5.5017],
        [-4.7817],
        [ 3.3572]])
Loss tensor([[565.7181]])
Step no 1
params: tensor([[ 2.7083],
        [ 3.4704],
        [-4.7618],
        [ 1.7063]])
Loss tensor([[63.2313]])
Step no 2
params: tensor([[ 2.9466],
        [ 4.1625],
        [-5.0185],
        [ 2.1488]])
Loss tensor([[8.1641]])
Step no 3
params: tensor([[ 2.9809],
        [ 3.9250],
        [-4.9775],
        [ 1.9679]])
Loss tensor([[1.9186]])
Step no 4
params: tensor([[ 3.0022],
        [ 4.0062],
        [-5.0020],
        [ 2.0230]])
Loss tensor([[1.2005]])
Step no 5
params: tensor([[ 3.0042],
        [ 3.9781],
        [-4.9962],
        [ 2.0024]])
Loss tensor([[1.1173]])
Step no 6
params: tensor([[ 3.0061],
        [ 3.9877],
        [-4.9989],
        [ 2.0090]])
Loss tensor([[1.1076]])
Step no 7
params: tensor([[ 3.0062],
        [ 3.9844],
        [-4.9982],
        [ 2.0067]])
Loss tensor([[1.1064]])
Step no 8
params: tensor([[ 3.0064],
        

In [39]:
# analytical solution
X = torch.cat((data, torch.ones(data.shape[0], 1)), 1)
beta = torch.inverse( X.t() @ X) @ X.t() @ y

In [40]:
print(beta)

tensor([[ 3.0064],
        [ 3.9852],
        [-4.9984],
        [ 2.0072]])
