In [2]:
import sys
from typing import Literal

import numpy as np
import torch
from torch import nn, Tensor
from torch.backends.mps import is_available as mps_is_available
from torch.cuda import is_available as cuda_is_available
from tqdm.notebook import tqdm

In [3]:
if not sys.version_info >= (3, 10):
    raise RuntimeError("This notebook requires Python 3.10 or later.")
print(f'Python version: {sys.version}')

print(f'PyTorch version: {torch.__version__}')
device = 'cuda' if cuda_is_available() else 'mps' if mps_is_available() else 'cpu'
print(f'Currently, using {device} device.')

Python version: 3.10.11 (main, Apr  7 2023, 07:24:53) [Clang 14.0.0 (clang-1400.0.29.202)]
PyTorch version: 2.0.0
Currently, using mps device.


# Testing on Neural Network's Capability on Modeling Multiplication

## Hyper-parameters

In [10]:
batchsize = 100000
epoch = 50000
lr = 0.001

## Define the Network

In [11]:
class Feedforward(torch.nn.Module):
    def __init__(self, input_size: int = 2, hidden_size: int = 2000, layers: Literal[1, 2] = 1):
        super(Feedforward, self).__init__()

        l = [nn.Linear(input_size, hidden_size), nn.ReLU()]
        if layers == 2:
            l.extend([nn.Linear(hidden_size, hidden_size), nn.ReLU()])
        l.append(nn.Linear(hidden_size, 1))
        # l.append(nn.Tanh())
        self.model = nn.Sequential(*l)
        
    def forward(self, x: Tensor) -> Tensor:
        return self.model(x)

In [12]:
model = Feedforward(2, 2000, 1).to(device)
# input is (batchsize, 2)
# output dimension is (batchsize, 1)
criterion = torch.nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

## Train the Model

In [13]:
# Train the model
model.train()

training_bar = tqdm(range(epoch))
for e in training_bar:
    x_train = np.random.uniform(low=-1.0, high=1.0, size=(batchsize, 2)).astype(np.float32)
    y_train = x_train[:, 0] * x_train[:, 1]
    x_train = torch.from_numpy(x_train).to(device)
    y_train = torch.from_numpy(y_train).to(device)

    optimizer.zero_grad()
    # Forward pass
    y_pred: Tensor = model(x_train)
    # Compute Loss
    loss: Tensor = criterion(y_pred.squeeze(), y_train)
    
    log_str = 'Epoch {}: train loss: {}'.format(e, loss.cpu().item())
    training_bar.set_postfix_str(log_str)
    if e % 1000 == 0:
        print(log_str)

    loss.backward()
    optimizer.step()

  0%|          | 0/50000 [00:00<?, ?it/s]

Epoch 0: train loss: 0.11531239748001099
Epoch 1000: train loss: 1.9180145045538666e-06
Epoch 2000: train loss: 0.0003018889401573688
Epoch 3000: train loss: 7.147638098103926e-05
Epoch 4000: train loss: 0.0032092358451336622
Epoch 5000: train loss: 6.315989139693556e-06
Epoch 6000: train loss: 1.1285478649369907e-05
Epoch 7000: train loss: 3.8866159002282075e-07


KeyboardInterrupt: 

## Evaluation

In [14]:
# Evaluate
model.eval()
with torch.no_grad():
    x_test = np.random.uniform(low=-1.0, high=1.0, size=(batchsize, 2)).astype(np.float32)
    y_test = x_test[:, 0] * x_test[:, 1]
    x_test = torch.from_numpy(x_test).to(device)
    y_test = torch.from_numpy(y_test).to(device)

    y_pred = model(x_test)
    after_train = criterion(y_pred.squeeze(), y_test) 
    print('Test loss after Training' , after_train.item())

Test loss after Training 0.00016203639097511768


In [4]:
# What is S/N ratio in dB corresponding to MSE loss?
actual_error = np.sqrt(after_train.item())
print("In dB:", 20 * np.log10(actual_error))

In dB: -64.10428376953978


```
layers hidden   time   batchsize   dB
1        10       7s       10000  -15
2        10      10s       10000  -42
1       100       7s       10000  -54
2       100      15s       10000  -52
1       100    1m02s      100000  -55
1      1000      35s       10000  -65
1      1000    6m14s      100000  -64 
2      1000    3m50s       10000  -59
1      2000    1m06s       10000  -66
1      2000               100000  
2      2000   13m12s       10000  -63
1      4000    2m04s       10000  -63
1      4000   22m13s      100000   17 (!)
```