In [12]:
import torch
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [7]:
def generate_disc_set(nb):
    data = torch.empty(nb, 2).uniform_(-1, 1)
    norms = data.norm(dim=1)
    labels = (norms <= np.sqrt(2 / np.pi)).to(dtype=torch.long)
    return data, labels

def normalize(train, test):
    mean, std = train.mean(0), train.std(0)
    return (train - mean) / std, (test - mean) / std

In [8]:
# Generate train and test datasets
NUM_SAMPLES = 1000
train_data, train_labels = generate_disc_set(NUM_SAMPLES)
test_data, test_labels = generate_disc_set(NUM_SAMPLES)

# Normalize data
train_data, test_data = normalize(train_data, test_data)

In [40]:
class Data(Dataset):
    def __init__(self, x, y):
        self.x, self.y = x, y
        self.len = x.shape[0]
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.len

def train_model(model, train_input, train_target):
    num_epochs = 250
    lr = 0.1
    batch_size = 100
    # Build dataset and define data loader
    dataset = Data(train_input, train_target)
    dataloader = DataLoader(dataset, batch_size)
    # Define loss
    criterion = nn.CrossEntropyLoss()
    # Define optimizer
    optim = torch.optim.SGD(model.parameters(), lr=lr)
    # Train the model
    for epoch in range(num_epochs):
        for x, y in dataloader:
            # Make predition
            preds = model(x)
            # Clear gradients
            optim.zero_grad()
            # Accumulate gradients
            criterion(preds, y).backward()
            # Optimize parameters
            optim.step()
        
def compute_nb_errors(model, data_input, data_target):
    batch_size = 100
    # Build dataset and define data loader
    dataset = Data(data_input, data_target)
    dataloader = DataLoader(dataset, batch_size)
    # Test model
    nb_errors = 0
    for x, y in dataloader:
        _, preds = model(x).max(1)
        nb_errors += (preds != y).sum()
    return nb_errors.item()

In [41]:
def create_shallow_model():
    return nn.Sequential(
        nn.Linear(2, 128),
        nn.ReLU(),
        nn.Linear(128, 2),
        nn.Softmax()
    )

def create_deep_model():
    return nn.Sequential(
        nn.Linear(2, 4),
        nn.ReLU(),
        nn.Linear(4, 8),
        nn.ReLU(),
        nn.Linear(8, 16),
        nn.ReLU(),
        nn.Linear(16, 32),
        nn.ReLU(),
        nn.Linear(32, 64),
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.ReLU(),
        nn.Linear(128, 2),
        nn.Softmax()
    )

In [42]:
# Evaluate shallow model
shallow_model = create_shallow_model()
train_model(shallow_model, train_data, train_labels)
nb_errors = compute_nb_errors(shallow_model, test_data, test_labels)
print('Error rate:', nb_errors / test_data.shape[0])

Error rate: 0.007


In [43]:
# Evaluate deep model
deep_model = create_deep_model()
train_model(deep_model, train_data, train_labels)
nb_errors = compute_nb_errors(deep_model, test_data, test_labels)
print('Error rate:', nb_errors / test_data.shape[0])

Error rate: 0.063
