In [1]:
import numpy as np
import torch
from torch import nn, Tensor

print(torch.__version__)
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using device: {device}")
# print(torch.tensor([1,2,3], device="mps"))
# print(torch.tensor([1,2,3]))

2.0.0
Using device: mps


In [3]:
# example
model = nn.Sequential(
    nn.Linear(764, 100),
    nn.ReLU(),
    nn.Linear(100, 50),
    nn.ReLU(),
    nn.Linear(50, 10),
    nn.Sigmoid()
)

In [4]:
class Feedforward(torch.nn.Module):
    def __init__(self, input_size: int, hidden_size: int, layers: int):
        super(Feedforward, self).__init__()
        self.input_size = input_size
        self.hidden_size  = hidden_size
        self.layers = layers
        self.fc1 = nn.Linear(self.input_size, self.hidden_size)
        self.relu1 = nn.ReLU()
        if layers == 1:
            pass
        elif layers == 2:
            self.fc2 = nn.Linear(self.hidden_size, self.hidden_size)
            self.relu2 = nn.ReLU()
        else:
            raise ValueError('layers must be 1 or 2')
        self.fc3 = nn.Linear(self.hidden_size, 1)
        #self.tanh = torch.nn.Tanh()
        
    def forward(self, x: Tensor) -> Tensor:
        fc1_out = self.fc1(x)
        hidden1 = self.relu1(fc1_out)
        if self.layers == 1:
                output = self.fc3(hidden1)
        else:
            assert self.layers == 2
            fc2_out = self.fc2(hidden1)
            hidden2 = self.relu2(fc2_out)
            output = self.fc3(hidden2)
        #output = self.tanh(output)
        return output

In [5]:
# Create training and test data

# for x_train/x_test we need (n, 2) of random -1 to 1
# and output y_train/y_test we need the product
batchsize = 100000
x_train = np.random.uniform(low=-1.0, high=1.0, size=(batchsize, 2))
y_train = x_train[:, 0] * x_train[:, 1]

# print(x_train, y_train)
x_test = np.random.uniform(low=-1.0, high=1.0, size=(batchsize, 2))
y_test = x_test[:, 0] * x_test[:, 1]
# print(x_train, y_train)

x_train = torch.FloatTensor(x_train)
y_train = torch.FloatTensor(y_train)

x_test = torch.FloatTensor(x_test)
y_test = torch.FloatTensor(y_test)

In [7]:
model = Feedforward(2, 2000, 1)
model.to(device)
# input is (batchsize, 2)
# output dimension is (batchsize, 1)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)


# Train the model
model.train()
epoch = 50000
x_train = x_train.to(device)
y_train = y_train.to(device)
for epoch in range(1, epoch + 1):
    optimizer.zero_grad()
    # Forward pass
    y_pred = model(x_train)
    y_pred.to(device)
    # Compute Loss
    loss = criterion(y_pred.squeeze(), y_train)
   
    if epoch % 1000 == 0 or epoch == 1:
        print('Epoch {}: train loss: {}'.format(epoch, loss.item()))    # Backward pass
    loss.backward()
    optimizer.step()


# Evaluate
model.eval()
x_test = x_test.to(device)
y_test = y_test.to(device)
y_pred = model(x_test)
after_train = criterion(y_pred.squeeze(), y_test) 
print('Test loss after Training' , after_train.item())


# What is S/N ratio in dB corresponding to MSE loss?
actual_error = np.sqrt(after_train.item())
print("In dB:", 20 * np.log10(actual_error))

Epoch 1: train loss: 0.12654592096805573
Epoch 1000: train loss: 0.00028138718334957957
Epoch 2000: train loss: 0.0003959300520364195
Epoch 3000: train loss: 0.000230910605750978
Epoch 4000: train loss: 0.001158874831162393
Epoch 5000: train loss: 0.0007007761741988361
Epoch 6000: train loss: 0.0022664987482130527
Epoch 7000: train loss: 0.007033903617411852
Epoch 8000: train loss: 0.007691480685025454
Epoch 9000: train loss: 0.0018509876681491733
Epoch 10000: train loss: 0.004278330598026514
Epoch 11000: train loss: 0.0034861373715102673
Epoch 12000: train loss: 0.006240994669497013
Epoch 13000: train loss: 0.005059342365711927
Epoch 14000: train loss: 0.015225350856781006
Epoch 15000: train loss: 0.015546157024800777
Epoch 16000: train loss: 0.027390802279114723
Epoch 17000: train loss: 0.04245102405548096
Epoch 18000: train loss: 0.02348843403160572
Epoch 19000: train loss: 0.03328762203454971
Epoch 20000: train loss: 0.014060895889997482
Epoch 21000: train loss: 0.01289332099258899

```
layers hidden   time   batchsize   dB
1        10       7s       10000  -15
2        10      10s       10000  -42
1       100       7s       10000  -54
2       100      15s       10000  -52
1       100    1m02s      100000  -55
1      1000      35s       10000  -65
1      1000    6m14s      100000  -64 
2      1000    3m50s       10000  -59
1      2000    1m06s       10000  -66
1      2000               100000  
2      2000   13m12s       10000  -63
1      4000    2m04s       10000  -63
1      4000   22m13s      100000   17 (!)
```