In [1]:
import tensorflow as tf
import numpy as np

In [2]:
train_data_file = "D:/UCSD/F17/CSE293/data_prep_scripts/train_data.npy"
train_data = np.load(train_data_file)

In [4]:
class model(object):
    
    def __init__(self, ckpt_path, lr, epochs=1000, dim_lang=5, model_name="MLPM"):
        self.epochs = epochs
        self.ckpt_path = ckpt_path
        self.model_name = model_name
        self.lr = lr
        self.dim_lang = dim_lang
        
        self.all_lang_rep = dict()
                
        for lang in ["en", "pt", "es"]:
            self.all_lang_rep[lang] = np.random.normal(loc=0.0, scale=1/np.sqrt(float(dim_lang)), size=(dim_lang))
        
        def __graph__():
            tf.reset_default_graph()
            self.graph = tf.Graph()
            with self.graph.as_default():
                
                # source and target vector representation
                self.source_words = tf.placeholder(tf.float32, shape=[None, 300])
                self.target_words = tf.placeholder(tf.float32, shape=[None, 300])
                
                # parameter matrices
                self.encoder = tf.Variable(tf.truncated_normal([300, 300],\
                                           stddev=1/tf.sqrt(300.0)), name='encoder')
                
                self.decoder = tf.Variable(tf.truncated_normal([300, 300],\
                                           stddev=1/tf.sqrt(300.0)), name='decoder')
                
                self.lang_encoder = tf.Variable(tf.truncated_normal([2*dim_lang,300],\
                                               stddev=1/tf.sqrt(300.0)), name='lang_encoder')

                # language representation
                self.lang_rep_placeholder = tf.placeholder(tf.float32, shape=[1,2*self.dim_lang])
                self.lang_rep = tf.get_variable("lang_rep_in",shape=[1,2*self.dim_lang], dtype=tf.float32)
                                
                self.assign_lang_rep_op = self.lang_rep.assign(self.lang_rep_placeholder).op 
                # model equation
                self.target_pred = self.get_model(self.encoder, self.decoder, self.source_words,\
                                                  self.lang_rep, self.lang_encoder)
                
                #squared loss
                self.loss = tf.reduce_sum(tf.square(self.target_words-self.target_pred))
                
                #self.train_step = tf.train.GradientDescentOptimizer(self.lr).minimize(self.loss)
                self.train_step = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss)
                self.init = tf.global_variables_initializer()
                self.saver = tf.train.Saver()
        
        print('start building graph')
        __graph__()
        print('graph built')
        
    # encoder, decoder: 300x300   source_words: Nonex300    lang_rep: 1x2*dim_lang    lang_encoder: 2*dim_langx300
    # Output: Nonex300
    def get_model(self, encoder, decoder, source_words, lang_rep, lang_encoder):
        
        shared_source_words = tf.matmul(source_words, encoder)
        shared_lang_rep = tf.matmul(lang_rep, lang_encoder)
        
        num_examples = tf.shape(shared_source_words)[0]
        #(shared_source_words.get_shape())[0]
        tile = tf.tile(shared_lang_rep, tf.convert_to_tensor([1, num_examples]))
        shared_lang_aux = tf.reshape(tile, [num_examples, 300])
        shared_embedding_vector = shared_lang_rep+shared_lang_aux
        
        ret = tf.matmul(shared_embedding_vector, decoder)
        return ret 
    
    def get_feed(self, X, Y, src_lang, dest_lang):
        feed_dict = {self.source_words: X, self.target_words: Y}       
        return feed_dict
    
    #train -> train[en_pt]
    def train(self, train, train_lang_pairs, batch_size, validation, num_epochs=10, sess=None, log_file="./log_file.txt"):
        if sess == None:
            sess = tf.Session(graph = self.graph)
            sess.run(self.init)
            
        with sess.as_default():
            assert sess is tf.get_default_session()
            print (self.lang_rep.eval())
        #max data for a language pair/batch size
        max_data_size = int(max([len(x) for x in train])/batch_size)
        
        with open(log_file, 'w') as log :
            log.write("Batch \t lang_pair \t loss\n")
            
            for epoch in range(num_epochs):
                batch_index = np.zeros(shape=(len(train)), dtype=int)
                for batch_number in range(max_data_size):
                    #make batches
                    for lang_pair_data_enum in enumerate(train):
                        log_string = ""
                        i, lang_pair_data = lang_pair_data_enum
                        lang_pair_data = np.array(lang_pair_data)
                        
                        #rounding on the language pairs with less words in dictionary
                        if batch_index[i] >= len(lang_pair_data):
                            batch_index[i] = 0
            
                        cur_batch = lang_pair_data[batch_index[i]:(batch_index[i]+batch_size),:,:]
                        batch_index[i] += batch_size
            
                        X = cur_batch[:,0,:]
                        Y = cur_batch[:,1,:]
                        
                        lang = train_lang_pairs[i].split('-')

                        A = self.all_lang_rep[lang[0]]
                        B = self.all_lang_rep[lang[1]]
                        init = np.insert(B, np.arange(len(A)), A)
                        init = init.reshape(1, np.shape(init)[0])

                        sess.run(self.assign_lang_rep_op, feed_dict={self.lang_rep_placeholder : init})

                        #batch index more that lang pair data length not handled :P
                        _, train_loss, lang_rep = sess.run([self.train_step, self.loss, self.lang_rep], self.get_feed(X, Y, lang[0], lang[1]))
                        
                        log_string += str(batch_number) + " \t " + train_lang_pairs[i] + " \t " + str(train_loss) + "\n"
                        log.write(log_string)

                        self.all_lang_rep[lang[0]], self.all_lang_rep[lang[1]] = lang_rep[0][:5], lang_rep[0][5:]

                        print("Batch:" + str(batch_number))

                #save epoch
                if epoch and epoch%10==0:
                    self.saver.save(sess, self.ckpt_path+self.model_name+".ckpt", global_step=epoch)

                    #print losses #todo: format for train data with lang
        self.saver.save(sess, self.ckpt_path+self.model_name+".ckpt", global_step=num_epochs+1)
        np.savetxt()



In [5]:
mlpm = model(ckpt_path="./ckpt/",lr=0.01, epochs=1)

start building graph
graph built


In [6]:
mlpm.train(train_data,train_lang_pairs= ["pt-es", "pt-en", "en-es"], batch_size=40, validation=[])

[[ 0.58051044 -0.03814501 -0.1902228  -0.41598475  0.24314302  0.56493098
   0.5704307   0.24253303 -0.53691238  0.06418538]]
(40, 2, 300)
(40, 300)
(40, 300)
Batch:0
(40, 2, 300)
(40, 300)
(40, 300)
Batch:0
(40, 2, 300)
(40, 300)
(40, 300)
Batch:0
(40, 2, 300)
(40, 300)
(40, 300)
Batch:1
(40, 2, 300)
(40, 300)
(40, 300)
Batch:1
(40, 2, 300)
(40, 300)
(40, 300)
Batch:1
(40, 2, 300)
(40, 300)
(40, 300)
Batch:2
(40, 2, 300)
(40, 300)
(40, 300)
Batch:2
(40, 2, 300)
(40, 300)
(40, 300)
Batch:2
(40, 2, 300)
(40, 300)
(40, 300)
Batch:3
(40, 2, 300)
(40, 300)
(40, 300)
Batch:3
(40, 2, 300)
(40, 300)
(40, 300)
Batch:3
(40, 2, 300)
(40, 300)
(40, 300)
Batch:4
(40, 2, 300)
(40, 300)
(40, 300)
Batch:4
(40, 2, 300)
(40, 300)
(40, 300)
Batch:4
(40, 2, 300)
(40, 300)
(40, 300)
Batch:5
(40, 2, 300)
(40, 300)
(40, 300)
Batch:5
(40, 2, 300)
(40, 300)
(40, 300)
Batch:5
(40, 2, 300)
(40, 300)
(40, 300)
Batch:6
(40, 2, 300)
(40, 300)
(40, 300)
Batch:6
(40, 2, 300)
(40, 300)
(40, 300)
Batch:6
(23, 2, 300)


KeyboardInterrupt: 