In [2]:
import torch, math
from torch import Tensor, LongTensor
from torch import optim, nn
from torch.autograd import Variable

torch.sum(input, dim, keepdim=False, out=None) -> Tensor
* dim: the dimension to be reduced
* keepdim: whether the output tensor has dim retained or not
* Example:  

```python
>>> a = torch.randn(4, 4)
>>> a
tensor([[-0.3811, -0.7403,  1.1272,  1.0669],
        [-0.9449, -1.0284,  0.3444, -0.4910],
        [-0.6464, -0.3024,  0.1091, -2.3793],
        [ 0.7808,  1.2390, -0.0583, -1.2738]])
>>> torch.sum(a, 1) 
"""dimension 1 will be reduced -> calcutate sum of each row"""
"""The output here has been squeezed (torch.squeeze()) since keepdim=False"""
tensor([ 1.0727, -2.1199, -3.2190,  0.6878])
"""keepdim=True"""
tensor([[ 1.0727],
        [-2.1199],
        [-3.2190],
        [ 0.6878]])
```


In [3]:
## ex1 Toy Dataset ##
def generate_disc_set(nb):
    input = Tensor(nb,2).uniform_(-1,1)
    # inside circle -> label 1
    # outside circle -> label 0
    target = input.pow(2).sum(1).sub(2/ math.pi).sign().sub(1).div(-2).long()
    return input, target

train_input, train_target = generate_disc_set(1000)
test_input, test_target = generate_disc_set(1000)

mean, std = train_input.mean(), train_input.std()

train_input.sub_(mean).div_(std)
test_input.sub_(mean).div_(std)

train_input, train_target = Variable(train_input), Variable(train_target)
test_input, test_target = Variable(test_input), Variable(test_target)


In [4]:
batch_size = 100

def train_model(model, train_input, train_target):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr = 1e-1)
    nb_epochs = 250
    
    for e in range(nb_epochs):
        for b in range(0, train_input.size(0), batch_size):
            output = model(train_input.narrow(0, b, batch_size))
            loss = criterion(output, train_target.narrow(0, b, batch_size))
            model.zero_grad()
            loss.backward()
            optimizer.step()

In [5]:
def compute_nb_errors(model, test_input, test_target):
    nb_errors = 0
    
    for b in range(0, test_input.size(0), batch_size):
        output = model(test_input.narrow(0, b, batch_size))
        _, predicted_class = torch.max(output.data, 1)
        for k in range(0, batch_size):
            if test_target.data[b+k] != predicted_class[k]:
                nb_errors += 1
    return nb_errors

In [6]:
def shallow_model():
    return nn.Sequential(nn.Linear(2, 128),
                         nn.ReLU(),
                         nn.Linear(128,2))

def deep_model():
    return nn.Sequential(nn.Linear(2, 4),
                        nn.ReLU(),
                        nn.Linear(4, 8),
                        nn.ReLU(),
                        nn.Linear(8, 16),
                        nn.ReLU(),
                        nn.Linear(16, 32),
                        nn.ReLU(),
                        nn.Linear(32, 64),
                        nn.ReLU(),
                        nn.Linear(64, 128),
                        nn.ReLU(),
                        nn.Linear(128, 2))

In [7]:
### Benchmarking
for std in [-1, 1e-3, 1e-2, 1e-1, 1e-0, 1e1]:
    for m in [shallow_model, deep_model]:
        model = m()
        if std > 0:
            for p in model.parameters():
                p.data.normal_(0, std)
                
        train_model(model, train_input, train_target)
        
        print ('std {:s} {:f} train_error {:.02f}% test_error {:.02f}%'.format(
            m.__name__,
            std,
            compute_nb_errors(model, train_input, train_target) / train_input.size(0) * 100,
            compute_nb_errors(model, test_input, test_target) / test_input.size(0) * 100
        )
        )

std shallow_model -1.000000 train_error 0.60% test_error 1.10%
std deep_model -1.000000 train_error 2.20% test_error 2.60%
std shallow_model 0.001000 train_error 1.40% test_error 1.60%
std deep_model 0.001000 train_error 48.70% test_error 50.10%
std shallow_model 0.010000 train_error 1.10% test_error 1.60%
std deep_model 0.010000 train_error 48.70% test_error 50.10%
std shallow_model 0.100000 train_error 0.80% test_error 1.30%
std deep_model 0.100000 train_error 48.70% test_error 50.10%
std shallow_model 1.000000 train_error 0.50% test_error 0.80%
std deep_model 1.000000 train_error 51.30% test_error 49.90%
std shallow_model 10.000000 train_error 0.00% test_error 0.80%
std deep_model 10.000000 train_error 51.30% test_error 49.90%
