## Connect Google Drive to Collab

In [0]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


## Generate Sort-of-Clevr Dataset (taken from https://github.com/kimhc6028/relational-networks)

In [0]:
import cv2
import os
import numpy as np
import random
#import cPickle as pickle
import pickle

train_size = 9800
test_size = 200
img_size = 75
size = 5
question_size = 11 ##6 for one-hot vector of color, 2 for question type, 3 for question subtype
"""Answer : [yes, no, rectangle, circle, r, g, b, o, k, y]"""

nb_questions = 10
dirs = './data'

colors = [
    (0,0,255),##r
    (0,255,0),##g
    (255,0,0),##b
    (0,156,255),##o
    (128,128,128),##k
    (0,255,255)##y
]


try:
    os.makedirs(dirs)
except:
    print('directory {} already exists'.format(dirs))

def center_generate(objects):
    while True:
        pas = True
        center = np.random.randint(0+size, img_size - size, 2)        
        if len(objects) > 0:
            for name,c,shape in objects:
                if ((center - c) ** 2).sum() < ((size * 2) ** 2):
                    pas = False
        if pas:
            return center



def build_dataset():
    objects = []
    img = np.ones((img_size,img_size,3)) * 255
    for color_id,color in enumerate(colors):  
        center = center_generate(objects)
        if random.random()<0.5:
            start = (center[0]-size, center[1]-size)
            end = (center[0]+size, center[1]+size)
            cv2.rectangle(img, start, end, color, -1)
            objects.append((color_id,center,'r'))
        else:
            center_ = (center[0], center[1])
            cv2.circle(img, center_, size, color, -1)
            objects.append((color_id,center,'c'))


    rel_questions = []
    norel_questions = []
    rel_answers = []
    norel_answers = []
    """Non-relational questions"""
    for _ in range(nb_questions):
        question = np.zeros((question_size))
        color = random.randint(0,5)
        question[color] = 1
        question[6] = 1
        subtype = random.randint(0,2)
        question[subtype+8] = 1
        norel_questions.append(question)
        """Answer : [yes, no, rectangle, circle, r, g, b, o, k, y]"""
        if subtype == 0:
            """query shape->rectangle/circle"""
            if objects[color][2] == 'r':
                answer = 2
            else:
                answer = 3

        elif subtype == 1:
            """query horizontal position->yes/no"""
            if objects[color][1][0] < img_size / 2:
                answer = 0
            else:
                answer = 1

        elif subtype == 2:
            """query vertical position->yes/no"""
            if objects[color][1][1] < img_size / 2:
                answer = 0
            else:
                answer = 1
        norel_answers.append(answer)
    
    """Relational questions"""
    for i in range(nb_questions):
        question = np.zeros((question_size))
        color = random.randint(0,5)
        question[color] = 1
        question[7] = 1
        subtype = random.randint(0,2)
        question[subtype+8] = 1
        rel_questions.append(question)

        if subtype == 0:
            """closest-to->rectangle/circle"""
            my_obj = objects[color][1]
            dist_list = [((my_obj - obj[1]) ** 2).sum() for obj in objects]
            dist_list[dist_list.index(0)] = 999
            closest = dist_list.index(min(dist_list))
            if objects[closest][2] == 'r':
                answer = 2
            else:
                answer = 3
                
        elif subtype == 1:
            """furthest-from->rectangle/circle"""
            my_obj = objects[color][1]
            dist_list = [((my_obj - obj[1]) ** 2).sum() for obj in objects]
            furthest = dist_list.index(max(dist_list))
            if objects[furthest][2] == 'r':
                answer = 2
            else:
                answer = 3

        elif subtype == 2:
            """count->1~6"""
            my_obj = objects[color][2]
            count = -1
            for obj in objects:
                if obj[2] == my_obj:
                    count +=1 
            answer = count+4

        rel_answers.append(answer)

    relations = (rel_questions, rel_answers)
    norelations = (norel_questions, norel_answers)
    
    img = img/255.
    dataset = (img, relations, norelations)
    return dataset


print('building test datasets...')
test_datasets = [build_dataset() for _ in range(test_size)]
print('building train datasets...')
train_datasets = [build_dataset() for _ in range(train_size)]


#img_count = 0
#cv2.imwrite(os.path.join(dirs,'{}.png'.format(img_count)), cv2.resize(train_datasets[0][0]*255, (512,512)))


print('saving datasets...')
filename = os.path.join(dirs,'sort-of-clevr.pickle')
with  open(filename, 'wb') as f:
    pickle.dump((train_datasets, test_datasets), f)
print('datasets saved at {}'.format(filename))

building test datasets...
building train datasets...
saving datasets...
datasets saved at ./data/sort-of-clevr.pickle


## Define functions for neural network + data processing

In [0]:
import keras
import tensorflow as tf
import numpy as np
import pickle
import os
import gc
import os
from google.colab import files
from PIL import Image
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.layers import LSTM
from tensorflow.examples.tutorials.mnist import input_data

#mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
'''TO DO: Create LSTM for processing questions
   - Load and test with sort-of-clevr database
   
   - IDEA: Instead of taking every object pair, instead use random object pairs and instead of reduce mean,
   concatenate the object pairs together after feeding it through the object pair neural net (g(x))

- Issue probably stems from the information lost from reduce mean 
- Majority ram used in initial loading and processing data in memory
- Doesnt work for minibatch training
'''

'''LSTM Setup: Words are given unique integers (one hot encoding) - questions are fed in one word at a time'''
'''Or For simplicity - just add question vector directly to RN vector ie 
Questions are encoded into a vector of size of 11 : 6 for one-hot vector 
for certain color among 6 colors, 2 for one-hot vector of 
relational/non-relational questions. 3 for one-hot vector of 3 subtypes.'''

def weight_variable(shape,namez):
  initial = tf.truncated_normal(shape, stddev=0.1,name=namez)
  return tf.Variable(initial)

def bias_variable(shape,namez):
  initial = tf.constant(0.1, shape=shape,name=namez)
  return tf.Variable(initial)

def lstm_cell(lstm_size):
  return tf.keras.layers.LSTM(lstm_size)

def conv2d(x, W, namez):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME',name=namez)

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

def getarray(imgw):
	im = Image.open(imgw)
	npim = np.array(im)
	npim = npim.reshape(-1,80,80,4)
	return npim

def getarraystandard(imgw):
	npim = getarray(imgw)

	scalar = StandardScaler()

	shape = npim.shape
	npim = npim.reshape(1,-1)

	npim = scalar.fit_transform(npim)

	npim = npim.reshape(shape)

	return npim

def loadclvr():
	print('loading data...')
	dirs = './data'
	filename = os.path.join(dirs,'sort-of-clevr.pickle')
	with open(filename,'rb') as f:
		traindata, testdata = pickle.load(f)
	reltrain = []
	reltest = []
	noreltrain = []
	noreltest = []
	print('processing data...')

	for img, relations, norelations in traindata:
		img = np.swapaxes(img,0,2)
		for qst,ans in zip(relations[0], relations[1]):
			reltrain.append((img,qst,ans))
		for qst,ans in zip(norelations[0],norelations[1]):
			noreltrain.append((img,qst,ans))

	for img, relations, norelations in testdata:
		img = np.swapaxes(img,0,2)
		for qst,ans in zip(relations[0], relations[1]):
			reltest.append((img,qst,ans))
		for qst,ans in zip(norelations[0], norelations[1]):
			noreltest.append((img,qst,ans))

	return (reltrain, reltest, noreltrain, noreltest)

def cvt_data_axis(data):
	img = [e[0] for e in data]
	qst = [e[1] for e in data]
	ans = [e[2] for e in data]
	return (img,qst,ans)

def processclvr(data):
	imgs = np.asarray(data[0])
	questions = np.asarray(data[1])

	answers = np.asarray([data[2]])
	answers = answers.reshape(98000,1)
	onehot = OneHotEncoder(categories=[np.array([0,1,2,3,4,5,6,7,8,9])])
	answers = onehot.fit_transform(answers)

	return (imgs,questions,answers)




Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


## Load and Process Dataset

In [0]:
#files.download( "data/sort-of-clevr.pickle" ) 
reltrain,reltest,noreltrain,noreltest = loadclvr()
del(reltest)
del(noreltrain)
del(noreltest)
gc.collect()

a = cvt_data_axis(reltrain)

reltrain = processclvr(a)
qsts = reltrain[1][0].reshape(1,-1)
print(qsts.shape)

loading data...
processing data...
(1, 11)


## Initialize Model

In [0]:

#npim = getarraystandard('test.tiff')
#npim2 = getarraystandard('test2.tiff')

#finnpim = np.concatenate((npim,npim2),axis=0)

'''LSTM Word Processor'''
#xq = tf.placeholder(tf.float32, [None, n1,n2],name='xq')
#split input question into individual word vectors
#xq = tf.split(xq,n1,1)

#w1 = lstm_cell(n2)
#outputs, states = 

'''CNN Archetecture: 4 Conv layers to k feature maps of size nxn
k is the number ofk ernels of the final conv layer'''
#Image Size 80x80 with 5 pixel shapes
x = tf.placeholder(tf.float32, [None, 16875],name='x')
y_ = tf.placeholder(tf.float32, [None, 10], name='y_')

'''Conv Network'''
#First Convolutional Layer
W_conv1 = weight_variable([3, 3, 3, 32],'W_conv1')
b_conv1 = bias_variable([32],'b_conv1')

x_image = tf.reshape(x, [-1,75,75,3])

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1,'h_conv1') + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

#Second Convolutional Layer
W_conv2 = weight_variable([3, 3, 32, 32],'W_conv2')
b_conv2 = bias_variable([32],'b_conv2')

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2,'h_conv2') + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

#Third Convolutional Layer
W_conv3 = weight_variable([3, 3, 32, 32],'W_conv3')
b_conv3 = bias_variable([32],'b_conv3')

blah = tf.nn.relu(conv2d(h_pool2, W_conv3,'blah') + b_conv3)
h_conv3 = max_pool_2x2(blah)
"h_conv3 size: [None , 4 ,4 , 64] "


#Fourth Convolutional Layer
W_conv4 = weight_variable([3, 3, 32, 32],'W_conv4')
b_conv4 = bias_variable([32],'b_conv4')

h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4,'h_conv4') + b_conv4)
h_pool4 = max_pool_2x2(h_conv4)
#print(h_pool4)


with tf.Session() as sess:
	sess.run(tf.global_variables_initializer())

#Output (Batch, New COl, New Rows, Filters)
#Get Objects (there are col * rows objects)
	objects = []
	'''REMEMBER TO CHANGE OUTPUT WHEN SWTICHING BETWEEN MNIST AND ACTUAL DATA'''
	batchx = (np.swapaxes(reltrain[0][0],0,2))
	output = sess.run(h_pool4, feed_dict={x: batchx.reshape(1,-1)})
	#output = sess.run(h_pool4, feed_dict={x: npim})
	#print(h_pool3)
	
	'''Output (h_pool4) shape: (-1,3,3,32)'''
	print(output.shape)
	
	for i in range(0,len(output[0])):
		for j in range(0,len(output[0,0])):
			objects += [tf.concat([h_pool4[:,i,j,:], [[((5*i + j) / 25)]]], axis=1)]
	objects = np.asarray(objects)
	print(objects[0].shape)
#Compare each object pair
	qst_ = tf.placeholder(tf.float32, [None,11] ,name='qst_')
	#Stack object pairs into batch
	check = -1
	for k in range(0,len(objects)):
		first = objects[k]
		for g in range(k+1,len(objects)):
			second = objects[g]
			if check == -1:
				input2 = tf.concat((first,second,qst_), axis=1)

				check = 0
			else:
				tempinput2 = tf.concat((first,second,qst_), axis=1)
				input2 = tf.concat((input2,tempinput2),axis=0)
				#print(input2)
	'''Input 2 (Stacked Object Pairs) shape: (300, 77)'''
	print('INPUT 2 BElOW')
	print(input2)
	'''First MLP (For object pairs)'''
	#x2 = tf.placeholder(tf.float32, [None,128],name='x2')

	W_21 = weight_variable([77,66],'W_21')
	b_21 = bias_variable([66],'b_21')

	h21 = tf.nn.relu(tf.matmul(input2,W_21) + b_21)

	W_22 = weight_variable([66,66],'W_22')
	b_22 = bias_variable([66],'b_22')

	h22 = tf.nn.relu(tf.matmul(h21,W_22) + b_22)

	W_23 = weight_variable([66,66],'W_23')
	b_23 = bias_variable([66],'b_23')

	h23 = tf.nn.relu(tf.matmul(h22,W_23) + b_23)

	W_24 = weight_variable([66,128],'W_24')
	b_24 = bias_variable([128],'b_24')

	drop = tf.placeholder(tf.float32)

	h244 = tf.nn.relu(tf.matmul(h23,W_24) + b_24)
	h24 = tf.nn.dropout(h244, keep_prob = drop)

	'''h24 shape: (300, 128)'''
	#Feed batch of object pairs into First Relational MLP
	#output2 = sess.run(h24, feed_dict={x2: sess.run(input2,feed_dict={x: npim})})
	#print(output2.shape)

	#Elementwise sum of all object pair outputs
	input3old = tf.reduce_mean(h24,axis=0)	
	input3 = tf.reshape(input3old,[-1,128])
	'''Second MLP'''
	#x3 = tf.placeholder(tf.float32,[None,128],name='x3')

	W_31 = weight_variable([128,64],'W_31')
	b_31 = bias_variable([64],'W_31')
	testsimple = tf.layers.flatten(h_conv3)
	h31 = tf.nn.relu(tf.matmul(input3,W_31) + b_31)

	#Output Layer size equal to answer vocabulary length (one-shot encoding)
	

	E_31 = weight_variable([64,64],'E_31')
	be_31 = bias_variable([64],'be_31')
	h322 = tf.nn.relu(tf.matmul(h31,E_31) + be_31)
	h32 = tf.nn.dropout(h322, keep_prob = drop)

	W_32 = weight_variable([64,10],'W_32')
	b_32 = bias_variable([10],'b_32')

	finaloutt = tf.matmul(h32,W_32) + b_32
	finalout = tf.nn.dropout(finaloutt, keep_prob = drop)

	#Feed into second relational MLP
	
	#print(sess.run(finalout, feed_dict={x: finnpim}))

	#Training
	
	error = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=finalout))
	train_step = tf.train.AdamOptimizer(1e-4).minimize(error)
	correct_prediction = tf.equal(tf.argmax(finalout,1), tf.argmax(y_,1))
	accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
	sess.run(tf.global_variables_initializer())
	saver = tf.train.Saver()
	#save_path = saver.save(sess, "/content/drive/My Drive/RelationalNN/dm.ckpt")
	#print('done saving at',save_path)
	#for file in os.listdir('cpt'):
		#path = os.path.join('cpt/',file)
		#files.download(path)

(1, 5, 5, 32)
(1, 33)
INPUT 2 BElOW
Tensor("concat_3119:0", shape=(300, 77), dtype=float32)


## Restore saved model if available and start training Relational CNN

In [0]:
import os
import keras
import tensorflow as tf
import numpy as np
import pickle
import os
import gc
import os
from google.colab import files
from PIL import Image
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.layers import LSTM
from tensorflow.examples.tutorials.mnist import input_data
from google.colab import files

with tf.Session() as sess: 
	#saver = tf.train.Saver()
	sess.run(tf.global_variables_initializer())
	print(sess.run(b_32))
	saver.restore(sess,'/content/drive/My Drive/RelationalNN/dm1.ckpt')
	print(sess.run(b_32))
	for k in range(1,130):
		print('EPOCH %d' % (k))
		#idx = np.random.permutation(len(reltrain[2].toarray()))
		labelz,qstz,dataz = reltrain[2], reltrain[1], reltrain[0]
		if k % 5 == 0:
			sess.run(tf.global_variables_initializer())
			saver = tf.train.Saver()
			save_path = saver.save(sess, "/content/drive/My Drive/RelationalNN/dm%d.ckpt" % (k))
			print('done saving at',save_path)
		accsum = 0
		for i in range(0,75000):
			#print(type(reltrain[2][i].reshape(1,-1)))
			ys = labelz[i].toarray()
			qsts = qstz[i].reshape(1,-1)
			batchx = (np.swapaxes(dataz[i],0,2))
			#print(ys)
			#print(qsts)
			
			train_step.run(feed_dict={x: batchx.reshape(1,-1), y_: ys, qst_: qsts,drop: 1})
			'''
			if grads = -1:
				grads = optimizer.compute_gradients(error, feed_dict={x: batchx.reshape(1,-1), y_: ys, qst_: qsts,drop: 0.5})
			else:
				temp = optimizer.compute_gradients(error, feed_dict={x: batchx.reshape(1,-1), y_: ys, qst_: qsts,drop: 0.5})
				for i in range(0,len(temp)):
					grads[i][1] += temp[i][1] / 30
				if i % 30 == 0:
					optimizer.apply_gradients(grads)
					grads = -1
			'''


			if i % 10 == 0:
				print(sess.run(b_32))
				#batch = mnist.train.next_batch(100)
				train_accuracy = accuracy.eval(feed_dict={
					x:batchx.reshape(1,-1), y_: ys, qst_: qsts,drop:1})
				#print("step %d, training accuracy %g"%(i, train_accuracy))
				accsum += train_accuracy
				if i % 1000 == 0:
					print("steps %d to %d, training accuracy %g"%(i-1000,i, accsum / 100))
					accsum = 0
		accsum = 0
		for j in range(75001,80001):
			ys = labelz[j].toarray()
			qsts = qstz[j].reshape(1,-1)
			batchx = (np.swapaxes(dataz[j],0,2))
			if j % 10 == 0:
				#batch = mnist.train.next_batch(100)
				train_accuracy = accuracy.eval(feed_dict={
					x:batchx.reshape(1,-1), y_: ys, qst_: qsts,drop:1})
				#print("step %d, training accuracy %g"%(i, train_accuracy))
				accsum += train_accuracy
				if j % 5000 == 0:
					print("Validation Accuracy %g"%( accsum / 500))
					accsum = 0

	for u in range(80001,85001):
			ys = reltrain[2][u].toarray()
			qsts = reltrain[1][u].reshape(1,-1)
			batchx = (np.swapaxes(reltrain[0][u],0,2))
			if u % 10 == 0:
				#batch = mnist.train.next_batch(100)
				train_accuracy = accuracy.eval(feed_dict={
					x:batchx.reshape(1,-1), y_: ys, qst_: qsts,drop:1})
				#print("step %d, training accuracy %g"%(i, train_accuracy))
				accsum += train_accuracy
				if u % 5000 == 0:
					print("Test Ac %g"%( accsum / 500))
					accsum = 0

[0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1]
INFO:tensorflow:Restoring parameters from /content/drive/My Drive/RelationalNN/dm1.ckpt


NotFoundError: ignored