In [1]:
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline

  from ._conv import register_converters as _register_converters


In [27]:
# Functions:
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    
# def squash(vector):
#     epsilon = 0.001
#     norm_vector = tf.sqrt(tf.reduce_sum(tf.square(vector)))
#     norm_vector_epsilon = tf.sqrt(tf.reduce_sum(tf.square(vector))+epsilon)
    
#     return norm_vector/(norm_vector+1)*(tf.norm(vector)/tf.norm(vector))


def squash(vector):
    '''Squashing function.
    Args:
        vector: A 4-D tensor with shape [batch_size, num_caps, vec_len, 1],
    Returns:
        A 4-D tensor with the same shape as vector but
        squashed in 3rd and 4th dimensions.
    '''
    vec_abs = tf.sqrt(tf.reduce_sum(tf.square(vector)))  # a scalar
    scalar_factor = tf.square(vec_abs) / (1 + tf.square(vec_abs))
    vec_squashed = scalar_factor * tf.divide(vector, vec_abs)  # element-wise
    return(vec_squashed)

def routing(u_hat, b_IJ, num_iter):
    # Stopping the routing:
    u_hat_stopped = tf.stop_gradient(u_hat, name='u_hat_stopped')
    print('u_hat shape: ',u_hat_stopped.shape)
    u_hat_size = int(u_hat.shape[1])
    # Routing
    with tf.name_scope('routing'):
        for r_iter in range(num_iter):
            c = tf.nn.softmax(b_IJ,axis=2)
            #assert c.get_shape().as_list() == [5000,1152,10,1,1]
            if r_iter == num_iter-1:
                s_j = tf.reduce_sum(tf.multiply(c,u_hat),axis = 1, keepdims = True)
                v = squash(s_j)
            else:
                s_j = tf.reduce_sum(tf.multiply(c,u_hat_stopped),axis = 1,keepdims=True)
                v = squash(s_j)
                v_tiled = tf.tile(v,[1, u_hat_size,1,1,1])
                a = tf.matmul(u_hat_stopped, v_tiled, transpose_a=True)
                b_IJ = b_IJ + a
#         print('c shape: ',c.shape)
#         print('s_j shape: ',s_j.shape)
#         print('v shape: ',v.shape)
#         print('a shape: ',a.shape)
#         print('b_IJ shape: ',b_IJ.shape)
    return v,b_IJ

def get_predictions(v):
    v_norm = tf.sqrt(tf.reduce_sum(tf.square(v),axis=2))
    v_softmax = tf.nn.softmax(v_norm, axis =1)
    y_pred = tf.argmax(v_norm,axis=1)
    return y_pred


def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dictio = pickle.load(fo, encoding='bytes')
    return dictio

def load_image_data(data_path):
    if 'cifar' in data_path:
        dictio_data = unpickle(data_path)
        X = dictio_data[b'data'].reshape([10000,3,32,32]).transpose(0,2,3,1)
        y = np.array(dictio_data[b'labels'])
        print('shape : ', X.shape)
        return X,y   

In [3]:
# Downloading mnist dataset:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
# Train set
#X_train = X_train.astype(np.float32).reshape(-1, 28, 28,1) / 255.0
X_train = X_train.astype(np.float32)#.reshape(-1, 28, 28,1) / 255.0
y_train = y_train.astype(np.int32)
# Test set:
#X_test = X_test.astype(np.float32).reshape(-1, 28, 28,1) / 255.0
X_test = X_train.astype(np.float32)
y_test = y_test.astype(np.int32)
training_size = 1000
# Validation set:
X_train, X_valid = X_train[:training_size], X_train[training_size:]
y_train, y_valid = y_train[:training_size], y_train[training_size:]
print('size X train : ',X_train.shap )
print('size X valid : ',X_valid.shape )
print('size X test : ',X_test.shape )

size X train :  (1000, 28, 28)
size X valid :  (59000, 28, 28)
size X test :  (60000, 28, 28)


In [4]:
train_file_path = '../../others/CIFAR10-img-classification-tensorflow/cifar-10-batches-py/data_batch_1'
test_file_path = '../../others/CIFAR10-img-classification-tensorflow/cifar-10-batches-py/test_batchs'
X_train,y_train = load_image_data(train_file_path)
X_test,y_test = load_image_data(train_file_path)



shape :  (10000, 32, 32, 3)
shape :  (10000, 32, 32, 3)


In [5]:
#X_train[0].shape
#plt.imshow(X_train[2])

In [11]:
# Some constants:
batch_size = 250 #None#X_train.shape[0]
num_batches = int(X_train.shape[0]/batch_size)
image_size = X_train[0].shape[1]
num_inputs = image_size*image_size
height,width = image_size,image_size
if image_size == 32:
    num_channels = 3
else:
    num_channels = 1
#n_output_conv1 = (20,20,256)
# variables:
num_iter = 5
n_outputs = 10

In [39]:
# Initialize graph:
reset_graph()
# Placeholders:
X = tf.placeholder(shape=(batch_size, height, width, num_channels), dtype=tf.float32)
y = tf.placeholder(tf.int32, shape=(batch_size), name="y")
print('train placeholder size : ',X.shape)

train placeholder size :  (250, 32, 32, 3)


In [40]:
# First Conv layer:
with tf.name_scope('cnn'):
    conv = tf.layers.conv2d(X, filters=256, kernel_size=9, strides=[1,1], padding='VALID')
    print(conv.shape)

(250, 24, 24, 256)


In [41]:
# Problems with the implementation?
# First Capsule LAyer:
with tf.name_scope('caps'):
    caps = tf.layers.conv2d(conv,filters=256,kernel_size=9,strides=[2,2],padding='VALID')
    print('caps shape: ',caps.shape)
    caps_size = int(caps.shape[1])
    u_i = tf.reshape(caps, shape=[batch_size,32*caps_size*caps_size,8,1])
    #caps2 = tf.layers.conv2d(caps1,filters=8,kernel_size=9,strides=[2,2],padding='VALID')
    u_i = squash(u_i)
    print('u_i shape: ',u_i.shape)
    u_i_size = int(u_i.shape[1])
    #a_caps1 = squash(caps1)

caps shape:  (250, 8, 8, 256)
u_i shape:  (250, 2048, 8, 1)


In [42]:
# # First Capsule LAyer:
# with tf.name_scope('caps'):
#     caps_list = []
#     for i in range(8):
#         caps_i = tf.layers.conv2d(conv,filters=32,kernel_size=9,strides=[2,2],padding='VALID')
#         caps_i = tf.reshape(caps_i, [batch_size,6,6,32,1])
#         caps_list.append(caps_i)
        
#     u_i = tf.concat(caps_list,axis = -1)
#     print('caps shape: ',u_i.shape)
#     u_i = tf.reshape(u_i, shape=[-1,32*6*6,8,1])
#     #caps2 = tf.layers.conv2d(caps1,filters=8,kernel_size=9,strides=[2,2],padding='VALID')
#     u_i = squash(u_i)
#     print('u_i shape: ',u_i.shape)
#     #a_caps1 = squash(caps1)

In [43]:
# Routing:

with tf.variable_scope('final_layer'):
    w_initializer = np.random.normal(size=[1, u_i_size, 10, 8, 16], scale=0.01)
    W = tf.Variable(w_initializer, dtype=tf.float32)

    # repeat W with batch_size times to shape [batch_size, 1152, 8, 16]
    W = tf.tile(W, [batch_size, 1, 1, 1,1]) # -1 instead of Batch size
    print('W shape: ',W.shape)
    # calc u_ahat
    # [8, 16].T x [8, 1] => [16, 1] => [batch_size, 1152, 16, 1]
    u_i = tf.reshape(u_i, shape=(batch_size, -1, 1, u_i.shape[-2].value, 1)) # -1 instead of Batch size
    u_i = tf.tile(u_i, [1,1,10,1,1])
    print('u_i shape: ',u_i.shape)
    u_hat = tf.matmul(W, u_i, transpose_a=True)
    print('u_hat shape: ',u_hat.shape)
    
    with tf.variable_scope('routing'):
        # Initialize constants:
        b_IJ = tf.zeros([batch_size, u_i.shape[1].value, 10, 1, 1], dtype=np.float32)
        print('b_IJ shape: ',b_IJ.shape)
        v,b_IJ = routing(u_hat,b_IJ,num_iter)
        print('After routing : ')
        print('v shape: ',v.shape)
        

W shape:  (250, 2048, 10, 8, 16)
u_i shape:  (250, 2048, 10, 8, 1)
u_hat shape:  (250, 2048, 10, 16, 1)
b_IJ shape:  (250, 2048, 10, 1, 1)
u_hat shape:  (250, 2048, 10, 16, 1)
After routing : 
v shape:  (250, 1, 10, 16, 1)


In [44]:
#def calculate_margin_loss(y_batch,v,n_outputs):
m_pos = 0.9
m_neg = 0.1
margin_loss = 0
lambda_const = 0.5

with tf.name_scope('margin_loss'):
    v =tf.squeeze(v)
    #v_norm = tf.map_fn(lambda x: tf.norm(x,axis=1), v)
    v_norm = tf.sqrt(tf.reduce_sum(tf.square(v),axis=2))
    #v_softmax = tf.nn.softmax(v_norm, axis =1)
    #y_pred = tf.argmax(v_norm,axis=1)
    #t_k = tf.equal(y_pred,y_train)
    t_k = tf.one_hot(y,n_outputs)

    # Loss:
    max_l = tf.maximum(tf.cast(0,tf.float32),tf.square(m_pos - v_norm))
    min_l = tf.maximum(tf.cast(0,tf.float32),tf.square(v_norm - m_neg))

    margin_loss = tf.multiply(t_k,tf.square(max_l)) + lambda_const*tf.multiply((1-t_k),tf.square(min_l))
#    return margin_loss

In [38]:
v_masked.shape

TensorShape([Dimension(250), Dimension(16)])

In [56]:
# Regularizer Decoder:

with tf.name_scope('mask'):
    # Masking:
    v_list = []
    for i,j in zip(range(batch_size),y_train):   # y_train ?
        v_list.append(tf.reshape(tf.squeeze(v)[i][j,:],[1,16]))
    v_masked = tf.concat(v_list,axis=0) 
    
with tf.name_scope('decoder'):
    # 2 FC Relu:
    dec1 = tf.layers.dense(inputs=v_masked, units=512, activation=tf.nn.relu)
    dec2 = tf.layers.dense(inputs=dec1, units=1024, activation=tf.nn.relu)
    # 1 FC Sigmoid:
    dec3 =  tf.layers.dense(inputs=dec2, units=num_inputs*num_channels, activation=tf.nn.sigmoid)
    loss_reg = tf.sqrt(tf.reduce_sum(tf.square(tf.reshape(X, [batch_size,num_inputs*num_channels])-dec3)))
    

In [57]:
# Total loss:
with tf.name_scope('Loss'):
    #margin_loss = calculate_margin_loss(y_batch,v,n_outputs)
    loss = tf.reduce_sum(margin_loss) + loss_reg

In [58]:
# Backpropagation:
with tf.name_scope('optimizer'):
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

In [59]:
# Evaluation:
#with tf.name_scope('evaluate'):
    

In [62]:
init = tf.global_variables_initializer()
epochs = 5
train_size = X_train.shape[0]

with tf.Session() as sess:
    init.run()
    
    for epoch in range(epochs):
        shuffled_idx = np.random.permutation(train_size)
        X_batches = np.array_split(X_train[shuffled_idx], num_batches)
        y_batches = np.array_split(y_train[shuffled_idx], num_batches)
        

        for X_batch,y_batch in zip(X_batches, y_batches):
            sess.run(training_op, feed_dict = {X:X_batch, y:y_batch})
        print('Epoch {} completed'.format(epoch))
        
        X_batches_test = np.array_split(X_test, num_batches)
        y_batches_test = np.array_split(y_test, num_batches)
        y_pred_test = np.array([])
        for X_batch, y_batch in zip(X_batches_test, y_batches_test):
            y_pred_batch = sess.run(get_predictions(v),
                                feed_dict={X: X_batch, y: y_batch})

            y_pred_test = np.hstack([y_pred_test, y_pred_batch])

        test_accuracy = sum(y_pred_test == y_test) / len(y_pred_test)
        print('accuracy test: {}'.format(test_accuracy))
        

Epoch 0 completed
accuracy test: 0.103
Epoch 1 completed
accuracy test: 0.1256
Epoch 2 completed
accuracy test: 0.1152
Epoch 3 completed
accuracy test: 0.2137
Epoch 4 completed
accuracy test: 0.214


In [None]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    y_pred = sess.run(get_predictions(v),feed_dict = {X:X_batch, y:y_batch})
    accuracy = sum(y_pred ==  y_batch)/len(y_pred)
    #y_pred = sess.run(v,feed_dict={X:X_baaatch, y:y_batch})

In [None]:
preds = np.hstack([preds,y_pred])

In [None]:
preds.shape

In [None]:
sess = tf.InteractiveSession()

# Create example y_hat.
#sess.run(tf.global_variables_initializer())
#y_pred_run = sess.run(y_pred, feed_dict ={X:X_train})
#print(sess.run(t_k[0],feed_dict = {X:X_train, y : y_train}))
sess.run(tf.global_variables_initializer())
#y_sess = sess.run(y_pred, feed_dict={X:X_train, y:y_train})
tk_sess = sess.run(t_k, feed_dict={X:X_train, y:y_train})


sess.close()

In [None]:


loss = 0
for i in range(batch_size):
    for k in range(n_outputs):
        if y_pred[i]
        loss += 
    
    break

In [None]:
y_pred

In [None]:
Y = tf.one_hot(y_train, depth=10, axis=1, dtype=tf.float32)


In [None]:
masked_v = tf.multiply(tf.squeeze(v), tf.reshape(Y, (-1, 10, 1)))
v_length = tf.sqrt(tf.reduce_sum(tf.square(v), axis=2, keep_dims=True) + 0.01)

In [None]:
masked_v.shape

In [None]:
x =tf.boolean_mask(tf.reshape(v,[batch_size,10,16]),pd.get_dummies(y_train).values)

In [None]:
x.shape

In [None]:
sess = tf.InteractiveSession()

# Create example y_hat.
sess.run(tf.global_variables_initializer())
Y_run = sess.run(Y)

sess.close()

In [None]:
lower_triangular_ones = tf.constant(np.tril(np.ones([30,30])),dtype=tf.float32)


In [None]:
L.shape

In [None]:
import tensorflow as tf
import numpy as np

shape = [2, 2, 2, 10] 
L = np.arange(np.prod(shape))
L = np.reshape(L, shape)

indices = [0, 2, 3, 8]
axis = -1 # last dimension

def gather_axis(params, indices, axis=0):
    return tf.stack(tf.unstack(tf.gather(tf.unstack(params, axis=axis), indices)), axis=axis)


with tf.Session() as sess:
    partL = sess.run(gather_axis(L, indices, axis))
    print(partL.shape)
    #test = sess.run(tf.gather(tf.unstack(L, axis=axis),indices))

In [None]:
v_test = tf.reshape(tf.range(5000*10*16),[5000,10,16])

In [None]:

sess = tf.InteractiveSession()
v_run = sess.run(v_test)
v_mask = sess.run(tf.boolean_mask(v_test,mask=y_train,axis=0))
#v_masked = sess.run(gather_axis(v_test,y_train,0))
print(v_mask.shape)
sess.close()

In [None]:
list_v =[]
for i in range(v_run.shape[0]):
    list_v.append(tf.reshape(v_test[i,y_train[i],:],[1,16]))


In [None]:
list_v[0]

In [None]:
def routing(u_hat, b):
    v = []
    #b_shape = b.get_shape().as_list()
    u_hat_stopped = tf.stop_gradient(u_hat, name='u_hat_stopped')
    for j in range(10):   
        size_splits = [j, 1, b_shape[2] - j - 1]
        
        for r_iter in range(MAX_ITER):
            c = tf.nn.softmax(b,axis=2)

            assert c.get_shape() == [1, 1152, 10, 1]
            
            c = tf.tile(c,[5000,1,1,1])
#             b_il, b_ij, b_ir = tf.split(b, size_splits, axis=2)
#             c_il, c_ij, c_ir = tf.split(c, size_splits, axis=2)
        
            # Calculating c_ij
            #c_ij = c[:,:,j,:]
            #new_shape_c_ij = c_ij.get_shape().as_list() + [1]
            #c_ij = tf.reshape(c_ij, new_shape_c_ij)
            
            #assert c_ij.get_shape() == [1, 1152, 1, 1]
            
            # Computing s_j
            if j == MAX_ITER-1:
                s_j = tf.reduce_sum(tf.multiply(c,u_hat))
                
                
                
                
                s_j = tf.reduce_sum(tf.multiply(c_ij, u_hat),axis=1, keepdims=True)
            assert s_j.get_shape() == [5000, 1, 16, 1]
            
            # computing v_j
            v_j = squash(s_j)
            # updating b_ij:
            
            v_j_tiled = tf.tile(v_j, [1,1152,1,1])
            a_ij = tf.reduce_sum(tf.matmul(u_hat,v_j_tiled, transpose_a=True),axis = 0, keepdims=True)
            #print(v_j.shape)
            #print(a_ij.shape)
            
            b_ij += a_ij
            #print('b_ij shape: ',b_ij.shape)
            b = tf.concat([b_il, b_ij, b_ir] ,axis =2)
        v.append(v_j)
    v = tf.concat(v,axis=1)
    
    print('c_ij shape: ',c_ij.shape)
    print('s_j shape: ',s_j.shape)
    print('v_j shape: ',v_j.shape)
    print('v_j_tiled shape: ',v_j_tiled.shape)
    print('a_ij shape: ',a_ij.shape)
    print('v shape: ',v.shape)

In [None]:
a_test

In [None]:
with tf.name_scope('digit_caps'):
    W = tf.Variable( tf.random_uniform((8,16),seed = 1), name="theta",dtype=tf.float32)
    u_ji = tf.reshape(tf.matmul(tf.reshape(u_i,[5000*1152,8]),W),[5000,1152,16])
    v_j = routing(u_ji, num_iter)
    
    
    
    
    
    

In [None]:
# Testing:
sess = tf.InteractiveSession()

# We can just use 'c.eval()' without passing 'sess'
sess.run(tf.global_variables_initializer())

#print(a.eval(feed_dict=X_test))
print(c.eval())
sess.close()

In [None]:
# Testing class Caps Layer:

In [None]:
class CapsLayer(object):
    def __init__(self, input_tensor, num_capsules, digit_layer = False):
        self.input = input_tensor
        self.num_capsules = num_capsules
        
    def __call__(self, num_filters, kernel_size, strides):
        # In case it is a primary layer:
        if digit_layer == False:
            conv = tf.layers.conv2d(inputs=self.input, filters=num_filters, \
                                    kernel_size=kernel_size, activation=tf.nn.relu, \
                                    strides=strides, padding = 'SAME' )
            caps = self.squash(conv)
            return caps
        # In case it is a final layer:
        else:
            fcaps = self.routing(self.input)
            return fcaps
            
        
    def routing(self,capsule):
    
        
    
    def create_list_capsules(num_capsules):
        for i in range(num_capsules)
            if i == 0:
                self.list_capsules.append(tf.layers.conv2d(input_tensor, \
                                                           filters=num_filters, \
                                                           kernel_size=kernel_size, \
                                                           strides=[2,2], \
                                                           padding='VALID'))
            else:
                self.list_capsules.append(create_capsule)
        
    def create_capsule(input_tensor, kernel_size, strides):
        return tf.layers.conv2d(input_capsule, filters=num_filters, kernel_size=kernel_size, strides=[2,2], padding='VALID')
    
    def squashing(vector):
        return tf.norm(vector)/(tf.norm(vector)+1)*(tf.norm(vector)/tf.norm(vector))
    