In [1]:
import numpy as np
from __future__ import print_function
import keras
from keras.layers import Dense,Input,Lambda
from keras.models import Model
from keras.optimizers import SGD
import tensorflow as tf
import keras.backend as K

Using TensorFlow backend.


In [22]:
# Data Generator for generating numbers to be summed up. 'N_' is the number of digits to generate for a given batch.
class DataGenerator(object):
    
    def __init__(self, batch_size = 32, shuffle = True):
        self.batch_size = batch_size
        self.shuffle = shuffle

    def generate(self,N_=None):
        
         while 1:
            if N_ == None:
                N = (np.random.randint(10, size=1)+1)[0]
            else:
                N = N_
            X = np.random.randint(10, size=(N*self.batch_size))
            X = np.reshape(X,(self.batch_size,N,1))
            y = np.sum(X,axis=1)
            y = np.reshape(y,(self.batch_size,1,1))
            
            yield X, y

In [3]:
# Parameters for the model.
params = {'batch_size': 32,
          'shuffle': True}

In [4]:
# Generators for Train and Test. Maximum number of digits = 10
train_generator = DataGenerator(**params).generate()
test_generator = DataGenerator(**params).generate()

In [12]:
# Deep Sets Model

# \phi Network
x = Input(shape=(None,1,))
y = Dense(256, activation='relu')(x)
y = Dense(128, activation='relu')(y)
y = Dense(64, activation='relu')(y)

# Summing the representations outputted by \phi Network.
y = Lambda(lambda x_: tf.reduce_sum(x_,axis=1,keep_dims=True))(y)

# \rho Network
y = Dense(256, activation='relu')(y)
y = Dense(128, activation='relu')(y)
y = Dense(64, activation='relu')(y)
y = Dense(1,activation='relu')(y)

model = Model(x,y)

In [13]:
for layer in model.layers:
    print (layer.name,layer.output_shape)

input_2 (None, None, 1)
dense_8 (None, None, 256)
dense_9 (None, None, 128)
dense_10 (None, None, 64)
lambda_2 (None, 1, 64)
dense_11 (None, 1, 256)
dense_12 (None, 1, 128)
dense_13 (None, 1, 64)
dense_14 (None, 1, 1)


In [14]:
train_steps_per_epoch=100000
test_steps_per_epoch=10000
train_epochs=2

In [15]:
# L2 Loss function that the model is optimised on.
def least_squared_error(y_true, y_pred):
    return K.sum(K.square(y_pred - y_true), axis=-1)

In [16]:
# Compiling the model.
model.compile(optimizer='adam',
              loss=least_squared_error,
              metrics=['accuracy'])

In [17]:
# Training the model
model.fit_generator(train_generator,
                    steps_per_epoch=train_steps_per_epoch,
                    epochs=train_epochs,
                    validation_data=test_generator,
                    validation_steps=test_steps_per_epoch
                    )

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f0680ea8c90>

In [None]:
# Seems like it is achieving 100% accuracy for the test set. But I guess, there will be a lot of overlap with the sets it has seen in train set. 

In [18]:
test_x = np.random.randint(10, size=(10))
prediction = model.predict(np.reshape(test_x,(1,10,1)))
ground_truth = np.sum(test_x)

print ("Ground Truth: "+str(ground_truth))
print ("Prediction: "+str(prediction))

Ground Truth: 28
Prediction: [[[ 28.02798653]]]


In [19]:
test_x = np.random.randint(10, size=(10))
prediction = model.predict(np.reshape(test_x,(1,10,1)))
ground_truth = np.sum(test_x)

print ("Ground Truth: "+str(ground_truth))
print ("Prediction: "+str(prediction))

Ground Truth: 45
Prediction: [[[ 45.0466423]]]


In [26]:
#Evaluating for 10 digits per set for 32000 sets.
output = model.evaluate_generator(DataGenerator(**params).generate(N_=10),steps=1000)
print ("Loss: "+str(output[0]))
print ("Accuracy: "+str(output[1]*100))

Loss: 0.00198271378723
Accuracy: 100.0


In [28]:
#Evaluating for 100 digits per set for 32000 sets.
output = model.evaluate_generator(DataGenerator(**params).generate(N_=100),steps=1000)
print ("Loss: "+str(output[0]))
print ("Accuracy: "+str(output[1]*100))

Loss: 0.131676646866
Accuracy: 100.0
