In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import torch
from torch import nn
from d2l import torch as d2l

import mytorch
from mytorch import nn as mynn
from models import MLP0, MLP1, MLP4

In [None]:
# configure trainer for use with MyTorch
class Trainer(d2l.Trainer):           
    def fit_epoch(self):
        self.model.train()
        for batch in self.train_dataloader:
            loss = self.model.training_step(self.prepare_batch(batch))
            self.optim.zero_grad()
            with torch.no_grad():
                dLdZ = self.model.loss_fn.backward()
                self.model.backward(dLdZ)
                self.optim.step()
            self.train_batch_idx += 1
        if self.val_dataloader is None:
            return
        self.model.eval()
        for batch in self.val_dataloader:
            with torch.no_grad():
                self.model.validation_step(self.prepare_batch(batch))
            self.val_batch_idx += 1

# Regression Data

In [None]:
num_inputs = 2
num_outputs = 1
num_train = 1000
num_val = 500

w = np.array([2, -3.4]).astype('float')
b = np.array(4.2).astype('float')

data = d2l.SyntheticRegressionData(w=torch.tensor(w).float(), b=b, num_train=num_train, num_val=num_val)

### PyTorch Linear

In [None]:
# define and fit d2l/torch model
class SimpleLinear(d2l.Module):
    def __init__(self, num_inputs, num_outputs, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = nn.Linear(num_inputs, num_outputs)
        
    def forward(self, X):
        return self.net(X)
    
    def loss(self, Y_hat, Y):
        loss_fn = nn.MSELoss()
        return loss_fn(Y_hat, Y)
    
net = SimpleLinear(num_inputs, num_outputs, lr=0.01)
trainer = d2l.Trainer(max_epochs=10)
trainer.fit(net, data)

print(net.net.weight.data)
print(net.net.bias.data)

### MyTorch Linear

In [None]:
# put our MyTorch network into d2l format
class MyLinear(d2l.Module):
    def __init__(self, num_inputs, num_outputs, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = mynn.Linear(num_inputs, num_outputs)
        
    def forward(self, X):
        return self.net.forward(X.numpy())
    
    def backward(self, dLdZ):
        self.net.backward(dLdZ)
    
    def loss(self, Y_hat, Y):
        loss_fn = mynn.MSELoss()
        Y = Y.detach().numpy()  
        loss = loss_fn.forward(Y_hat, Y)
        self.loss_fn = loss_fn
        return torch.tensor(loss)
    
    def loss_fun(self):
        return self.loss_fn
    
    def configure_optimizers(self):
        optim = mytorch.optim.SGD(self.net, lr=self.lr)
        return(optim)
    
net = MyLinear(num_inputs, num_outputs, lr=0.01)
trainer = Trainer(max_epochs=10)
trainer.fit(net, data)

print(net.net.W)
print(net.net.b)

### PyTorch MLP0

In [None]:
# define and fit d2l/torch model
class SimpleMLP0(d2l.Module):
    def __init__(self, num_inputs, num_outputs, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = nn.Sequential(nn.Linear(num_inputs, num_outputs))
        
    def forward(self, X):
        return self.net(X)
    
    def loss(self, Y_hat, Y):
        loss_fn = nn.MSELoss()
        return loss_fn(Y_hat, Y)
    
net = SimpleMLP0(num_inputs, num_outputs, lr=0.01)
trainer = d2l.Trainer(max_epochs=10)
trainer.fit(net, data)

print(net.net[0].weight.data)
print(net.net[0].bias.data)

### MyTorch MLP0

In [None]:
# put our MyTorch network into d2l format
class MyMLP0(d2l.Module):
    def __init__(self, num_inputs, num_outputs, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = MLP0(num_inputs, num_outputs)
        
    def forward(self, X):
        return self.net.forward(X.numpy())
    
    def backward(self, dLdZ):
        self.net.backward(dLdZ)
    
    def loss(self, Y_hat, Y):
        loss_fn = mynn.MSELoss()
        Y = Y.detach().numpy()  
        loss = loss_fn.forward(Y_hat, Y)
        self.loss_fn = loss_fn
        return torch.tensor(loss)
    
    def loss_fun(self):
        return self.loss_fn
    
    def configure_optimizers(self):
        optim = mytorch.optim.SGD(self.net, lr=self.lr)
        return(optim)
    
net = MyMLP0(num_inputs, num_outputs, lr=0.01)
trainer = Trainer(max_epochs=10)
trainer.fit(net, data)

print(net.net.layers[0].W)
print(net.net.layers[0].b)

### PyTorch MLP1

In [None]:
# define and fit d2l/torch model
class SimpleMLP1(d2l.Module):
    def __init__(self, num_inputs, num_outputs, num_hiddens, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = nn.Sequential(nn.Linear(num_inputs, num_hiddens),
                   nn.ReLU(),
                   nn.Linear(num_hiddens, num_outputs))
        
    def forward(self, X):
        return self.net(X)
    
    def loss(self, Y_hat, Y):
        loss_fn = nn.MSELoss()
        return loss_fn(Y_hat, Y)
    
num_hiddens = 10
net = SimpleMLP1(num_inputs, num_outputs, num_hiddens, lr=0.01)
trainer = d2l.Trainer(max_epochs=10)
trainer.fit(net, data)

### MyTorch MLP1

In [None]:
# put our MyTorch network into d2l format
class MyMLP1(d2l.Module):
    def __init__(self, num_inputs, num_outputs, num_hiddens, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = MLP1(num_inputs, num_outputs, num_hiddens)
        
    def forward(self, X):
        return self.net.forward(X.numpy())
    
    def backward(self, dLdZ):
        self.net.backward(dLdZ)
    
    def loss(self, Y_hat, Y):
        loss_fn = mynn.MSELoss()
        Y = Y.detach().numpy()  
        loss = loss_fn.forward(Y_hat, Y)
        self.loss_fn = loss_fn
        return torch.tensor(loss)
    
    def loss_fun(self):
        return self.loss_fn
    
    def configure_optimizers(self):
        optim = mytorch.optim.SGD(self.net, lr=self.lr)
        return(optim)
    
num_hiddens = 10
net = MyMLP1(num_inputs, num_outputs, num_hiddens, lr=0.01)
trainer = Trainer(max_epochs=10)
trainer.fit(net, data)

### PyTorch MLP4

In [None]:
# define and fit d2l/torch model
class SimpleMLP4(d2l.Module):
    def __init__(self, num_inputs, num_outputs, num_hiddens, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = nn.Sequential(nn.Flatten(),
                                 nn.Linear(num_inputs, num_hiddens),
                                 nn.ReLU(),
                                 nn.Linear(num_hiddens, num_hiddens),
                                 nn.ReLU(),
                                 nn.Linear(num_hiddens, num_hiddens),
                                 nn.ReLU(),
                                 nn.Linear(num_hiddens, num_hiddens),
                                 nn.ReLU(),
                                 nn.Linear(num_hiddens, num_outputs))
        
    def forward(self, X):
        return self.net(X)
    
    def loss(self, Y_hat, Y):
        loss_fn = nn.MSELoss()
        return loss_fn(Y_hat, Y)
    
num_hiddens = 10
net = SimpleMLP4(num_inputs, num_outputs, num_hiddens, lr=0.01)
trainer = d2l.Trainer(max_epochs=10)
trainer.fit(net, data)

### MyTorch MLP4

In [None]:
# put our MyTorch network into d2l format
class MyMLP4(d2l.Module):
    def __init__(self, num_inputs, num_outputs, num_hiddens, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = MLP4(num_inputs, num_outputs, num_hiddens)
        
    def forward(self, X):
        return self.net.forward(X.numpy())
    
    def backward(self, dLdZ):
        self.net.backward(dLdZ)
    
    def loss(self, Y_hat, Y):
        loss_fn = mynn.MSELoss()
        Y = Y.detach().numpy()  
        loss = loss_fn.forward(Y_hat, Y)
        self.loss_fn = loss_fn
        return torch.tensor(loss)
    
    def loss_fun(self):
        return self.loss_fn
    
    def configure_optimizers(self):
        optim = mytorch.optim.SGD(self.net, lr=self.lr)
        return(optim)
    
num_hiddens = 10
net = MyMLP4(num_inputs, num_outputs, num_hiddens, lr=0.01)
trainer = Trainer(max_epochs=10)
trainer.fit(net, data)

# Classification Data

In [None]:
data = d2l.FashionMNIST(batch_size=256)
num_inputs = 784
num_outputs = 10

### PyTorch Linear

In [None]:
# define and fit d2l/torch model
class SimpleLinear(d2l.Module):
    def __init__(self, num_inputs, num_outputs, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = nn.Sequential(nn.Flatten(),
                                 nn.Linear(num_inputs, num_outputs))
        
    def forward(self, X):
        return self.net(X)
    
    def loss(self, Y_hat, Y):
        loss_fn = nn.CrossEntropyLoss()
        return loss_fn(Y_hat, Y)
    
net = SimpleLinear(num_inputs, num_outputs, lr=0.1)
trainer = d2l.Trainer(max_epochs=10)
trainer.fit(net, data)

### MyTorch Linear

In [None]:
# put our MyTorch network into d2l format
class MyLinear(d2l.Module):
    def __init__(self, num_inputs, num_outputs, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = mynn.Linear(num_inputs, num_outputs)
        
    def forward(self, X):
        m = nn.Flatten()
        X = m(X)
        X = X.numpy()
        return self.net.forward(X)
    
    def backward(self, dLdZ):
        self.net.backward(dLdZ)
    
    def loss(self, Y_hat, Y):
        Y = nn.functional.one_hot(Y)
        Y = Y.detach().numpy()
        loss_fn = mynn.CrossEntropyLoss()
        self.loss_fn = loss_fn
        loss = loss_fn.forward(Y_hat, Y)
        return torch.tensor(loss)
    
    def loss_fun(self):
        return self.loss_fn
    
    def configure_optimizers(self):
        optim = mytorch.optim.SGD(self.net, lr=self.lr)
        return(optim)
    
net = MyLinear(num_inputs, num_outputs, lr=0.1)
trainer = Trainer(max_epochs=10)
trainer.fit(net, data)

### PyTorch MLP0

In [None]:
# define and fit d2l/torch model
class SimpleMLP0(d2l.Module):
    def __init__(self, num_inputs, num_outputs, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = nn.Sequential(nn.Flatten(),
                                 nn.Linear(num_inputs, num_outputs))
        
    def forward(self, X):
        return self.net(X)
    
    def loss(self, Y_hat, Y):
        loss_fn = nn.CrossEntropyLoss()
        return loss_fn(Y_hat, Y)
    
net = SimpleMLP0(num_inputs, num_outputs, lr=0.1)
trainer = d2l.Trainer(max_epochs=10)
trainer.fit(net, data)

### MyTorch MLP0

In [None]:
# put our MyTorch network into d2l format
class MyMLP0(d2l.Module):
    def __init__(self, num_inputs, num_outputs, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = MLP0(num_inputs, num_outputs)
        
    def forward(self, X):
        m = nn.Flatten()
        X = m(X)
        return self.net.forward(X.numpy())
    
    def backward(self, dLdZ):
        self.net.backward(dLdZ)
    
    def loss(self, Y_hat, Y):
        Y = nn.functional.one_hot(Y)
        Y = Y.detach().numpy()  
        loss_fn = mynn.CrossEntropyLoss()
        loss = loss_fn.forward(Y_hat, Y)
        self.loss_fn = loss_fn
        return torch.tensor(loss)
    
    def loss_fun(self):
        return self.loss_fn
    
    def configure_optimizers(self):
        optim = mytorch.optim.SGD(self.net, lr=self.lr)
        return(optim)
    
net = MyMLP0(num_inputs, num_outputs, lr=0.1)
trainer = Trainer(max_epochs=10)
trainer.fit(net, data)

### PyTorch MLP1

In [None]:
# define and fit d2l/torch model
class SimpleMLP1(d2l.Module):
    def __init__(self, num_inputs, num_outputs, num_hiddens, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = nn.Sequential(nn.Flatten(),
                                 nn.Linear(num_inputs, num_hiddens),
                                 nn.ReLU(),
                                 nn.Linear(num_hiddens, num_outputs))
        
    def forward(self, X):
        return self.net(X)
    
    def loss(self, Y_hat, Y):
        loss_fn = nn.CrossEntropyLoss()
        return loss_fn(Y_hat, Y)
    
num_hiddens = 20
net = SimpleMLP1(num_inputs, num_outputs, num_hiddens, lr=0.1)
trainer = d2l.Trainer(max_epochs=10)
trainer.fit(net, data)

### MyTorch MLP1

In [None]:
# put our MyTorch network into d2l format
class MyMLP1(d2l.Module):
    def __init__(self, num_inputs, num_outputs, num_hiddens, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = MLP1(num_inputs, num_outputs, num_hiddens)
        
    def forward(self, X):
        m = nn.Flatten()
        X = m(X)
        return self.net.forward(X.numpy())
    
    def backward(self, dLdZ):
        self.net.backward(dLdZ)
    
    def loss(self, Y_hat, Y):
        Y = nn.functional.one_hot(Y)
        Y = Y.detach().numpy()  
        loss_fn = mynn.CrossEntropyLoss()
        loss = loss_fn.forward(Y_hat, Y)
        self.loss_fn = loss_fn
        return torch.tensor(loss)
    
    def loss_fun(self):
        return self.loss_fn
    
    def configure_optimizers(self):
        optim = mytorch.optim.SGD(self.net, lr=self.lr)
        return(optim)
    
num_hiddens = 20
net = MyMLP1(num_inputs, num_outputs, num_hiddens, lr=0.1)
trainer = Trainer(max_epochs=10)
trainer.fit(net, data)

### PyTorch MLP4

In [None]:
# define and fit d2l/torch model
class SimpleMLP4(d2l.Module):
    def __init__(self, num_inputs, num_outputs, num_hiddens, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = nn.Sequential(nn.Flatten(),
                                 nn.Linear(num_inputs, num_hiddens),
                                 nn.ReLU(),
                                 nn.Linear(num_hiddens, num_hiddens),
                                 nn.ReLU(),
                                 nn.Linear(num_hiddens, num_hiddens),
                                 nn.ReLU(),
                                 nn.Linear(num_hiddens, num_hiddens),
                                 nn.ReLU(),
                                 nn.Linear(num_hiddens, num_outputs))
        
    def forward(self, X):
        return self.net(X)
    
    def loss(self, Y_hat, Y):
        loss_fn = nn.CrossEntropyLoss()
        return loss_fn(Y_hat, Y)
    
num_hiddens = 20
net = SimpleMLP4(num_inputs, num_outputs, num_hiddens, lr=0.1)
trainer = d2l.Trainer(max_epochs=10)
trainer.fit(net, data)

### MyTorch MLP4

In [None]:
# put our MyTorch network into d2l format
class MyMLP4(d2l.Module):
    def __init__(self, num_inputs, num_outputs, num_hiddens, lr):
        super().__init__()
        self.save_hyperparameters()
        self.net = MLP4(num_inputs, num_outputs, num_hiddens)
        
    def forward(self, X):
        m = nn.Flatten()
        X = m(X)
        return self.net.forward(X.numpy())
    
    def backward(self, dLdZ):
        self.net.backward(dLdZ)
    
    def loss(self, Y_hat, Y):
        Y = nn.functional.one_hot(Y)
        Y = Y.detach().numpy()  
        loss_fn = mynn.CrossEntropyLoss()
        loss = loss_fn.forward(Y_hat, Y)
        self.loss_fn = loss_fn
        return torch.tensor(loss)
    
    def loss_fun(self):
        return self.loss_fn
    
    def configure_optimizers(self):
        optim = mytorch.optim.SGD(self.net, lr=self.lr)
        return(optim)
    
num_hiddens = 20
net = MyMLP4(num_inputs, num_outputs, num_hiddens, lr=0.1)
trainer = Trainer(max_epochs=10)
trainer.fit(net, data)