# We verify that MLP can generalize symbolic constraints
- Sum of 25
- Increasing
- Symmetric
- Odd/Even (weird?) or Dividable by something

Real is much easier.
For symbolic, it would not be much harder if internally it learns embeddings for each number.

Observations:
3% of training data is too low -> generalization reaches 90% accuracy max (with embedding scheme). but 100% with proper normalization.
10% of training data is enough -> generalization reaches 100% accuracy.

Weirdly, real representation is much slower to learn with than onehot representation
-> was a normalization issue, rescaling all reals to [0;1] solves the problem

In [182]:
import torch
from torch import nn
import numpy as np
from coins import generate_combinations
from itertools import product
from torch.utils.data import DataLoader

def split(iterable, train_ratio=0.1, shuffle=True):
    if shuffle:
        np.random.shuffle(iterable)
    n = int(len(iterable)*train_ratio)
    train = iterable[:n]
    test = iterable[n:]
    return train, test

def to_onehot(x):
    x_flat = x.flatten()
    x_onehot = np.zeros((len(x_flat), 10), dtype=np.float32)
    x_onehot[range(len(x_onehot)), x_flat] = 1.
    return x_onehot.reshape(x.shape[0], x.shape[1], 10)

#ONSTRAINT = 'sum_25'
CONSTRAINT = 'increasing'
#NCODING_MODE = 'onehot'
#ENCODING_MODE = 'embedding'
ENCODING_MODE = 'real'

uniform = np.asarray(list(product(range(10),range(10),range(10),range(10),range(10))))
if CONSTRAINT == 'sum_25':
    combinations = np.asarray(generate_combinations(25, range(10), 5))
    positive = combinations
    combinations_set = set(map(tuple, combinations))
    non_combinations = np.asarray([c for c in uniform if tuple(c) not in combinations_set])
    negative = non_combinations
elif CONSTRAINT == 'increasing':
    mask = np.all(np.diff(uniform, 1) >= 0, 1)
    positive = uniform[mask]
    negative = uniform[~mask]
if ENCODING_MODE == 'real':
    positive = positive.astype(np.float32) / 10. # respect range
    negative = negative.astype(np.float32) / 10.
else: # positive, negative must be Long before that
    positive = to_onehot(positive)
    negative = to_onehot(negative)
    
train_positive, test_positive = split(positive)
train_negative, test_negative = split(negative)

BATCH = 32

def make_infinite(iterable):
    while True:
        for i in iterable:
            yield i
            
            
train_positive_iter = make_infinite(DataLoader(train_positive, batch_size=BATCH, shuffle=True))
test_positive_iter = make_infinite(DataLoader(test_positive, batch_size=BATCH, shuffle=True))
train_negative_iter = make_infinite(DataLoader(train_negative, batch_size=BATCH, shuffle=True))
test_negative_iter = make_infinite(DataLoader(test_negative, batch_size=BATCH, shuffle=True))

print 'Positives', len(train_positive)
print 'Negatives', len(train_negative)

Positives 200
Negatives 9799


In [183]:
class Model(nn.Module):
    EMBEDDING = 'embedding' # share embeddings between same numbers
    ONEHOT = 'onehot'  # one hot without embedding sharing
    REAL = 'real'
    def __init__(self, mode, dims=100):
        nn.Module.__init__(self)
        self.mode = mode
        if mode == self.ONEHOT:
            self.main = nn.Sequential(
                nn.Linear(5*10, dims),  # 50 embeddings of dimension dims (digits*position) -> are all added together
                nn.ReLU(True),
                nn.Linear(dims, dims),
                nn.ReLU(True),
                nn.Linear(dims, 2),
                nn.LogSoftmax()
            )
        elif mode == self.REAL:
            self.main = nn.Sequential(
                nn.Linear(5, dims),
                nn.ReLU(True),
                nn.Linear(dims, dims),
                nn.ReLU(True),
                nn.Linear(dims, 2),
                nn.LogSoftmax()
            )
        elif mode == self.EMBEDDING:
            self.embedder = nn.Linear(10, dims) # 10 embeddings of dimension dims (position) -> concatenated together
            self.main = nn.Sequential(  # ReLU is not needed
                nn.Linear(5*dims, dims),
                nn.ReLU(True),
                nn.Linear(dims, 2),
                nn.LogSoftmax()
            )
                
    def forward(self, x):
        if self.mode == self.REAL:
            return self.main(x)
        elif self.mode == self.ONEHOT:
            x = x.view(len(x), -1)
            return self.main(x)
        elif self.mode == self.EMBEDDING:
            out = x.view(-1, 10)
            # embed
            out = self.embedder(out)
            out = out.view(len(x), -1)
            return self.main(out)
     
model = Model(mode=ENCODING_MODE)
criterion = torch.nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters())
losses = []
test_losses = []
test_accuracies = []

def get_loss(model, positive, negative):
    positive_pred = model(positive)
    negative_pred = model(negative)    
    positive_target = torch.ones(len(positive), dtype=torch.long)
    negative_target = torch.zeros(len(negative), dtype=torch.long)
    
    positive_loss = criterion(positive_pred, positive_target)
    negative_loss = criterion(negative_pred, negative_target)
    
    positive_accuracy = (positive_pred.argmax(1) == positive_target).float().mean()
    negative_accuracy = (negative_pred.argmax(1) == negative_target).float().mean()
    
    loss = 0.5 * (positive_loss + negative_loss)
    accuracy = 0.5 * (positive_accuracy + negative_accuracy)
    
    # Accuracy
    return loss, accuracy


for iteration in xrange(10000):
    # train
    positive = train_positive_iter.next()
    negative = train_negative_iter.next()
    loss, accuracy = get_loss(model, positive, negative)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # test
    test_positive = test_positive_iter.next().float()
    test_negative = test_negative_iter.next().float()    
    test_loss, test_accuracy = get_loss(model, test_positive, test_negative)
  
    losses.append(loss.item())
    test_losses.append(test_loss.item())
    test_accuracies.append(test_accuracy.item())
    if iteration % 500 == 0:
        print 'Iteration', iteration
        print 'Train', np.mean(losses[-min(len(losses), 50):])
        print 'Test ', np.mean(test_losses[-min(len(losses), 50):])
        print 'Test Accuracy ', np.mean(test_accuracies[-min(len(losses), 50):])

Iteration 0
Train 0.6857330799102783
Test  0.6738572120666504
Test Accuracy  0.796875
Iteration 500
Train 0.08287315875291824
Test  0.09380870126187801
Test Accuracy  0.9708749997615814
Iteration 1000
Train 0.05011832640506327
Test  0.07946214739233255
Test Accuracy  0.9721875
Iteration 1500
Train 0.036228066058829424
Test  0.06890604943037033
Test Accuracy  0.975
Iteration 2000
Train 0.03201420086901635
Test  0.052420592317357656
Test Accuracy  0.9815625
Iteration 2500
Train 0.020497090311255305
Test  0.05273683045059443
Test Accuracy  0.9821875
Iteration 3000
Train 0.018519691927358507
Test  0.053652171096764505
Test Accuracy  0.980625
Iteration 3500
Train 0.01349942522123456
Test  0.036183242960833015
Test Accuracy  0.9896875
Iteration 4000
Train 0.011604854034958407
Test  0.04919256038032472
Test Accuracy  0.9840625
Iteration 4500
Train 0.01013363469421165
Test  0.055939925937709634
Test Accuracy  0.981875
Iteration 5000
Train 0.005791885050712153
Test  0.042234854553826154
Test Ac

In [141]:
positive.size()

torch.Size([32, 5, 10])

In [41]:
torch.long

torch.int64