In [1]:
import tensorflow as tf
import numpy as np

In [186]:
class model(object):
    
    def __init__(self, ckpt_path, lr, epochs=1000, dim_lang=5, model_name="MLPM"):
        self.epochs = epochs
        self.ckpt_path = ckpt_path
        self.model_name = model_name
        self.lr = lr
        self.dim_lang = dim_lang
        
        self.all_lang_rep = dict()
        
        for lang in ["en", "pt", "es"]:
            self.all_lang_rep[lang] = tf.Variable(tf.truncated_normal([1,dim_lang],\
                                               stddev=1/tf.sqrt(float(dim_lang))), name='lang_encoder'+lang)
        
        def __graph__():
            tf.reset_default_graph()
            self.graph = tf.Graph()
            with self.graph.as_default():
                
                # source and target vector representation
                self.source_words = tf.placeholder(tf.float32, shape=[None, 300])
                self.target_words = tf.placeholder(tf.float32, shape=[None, 300])
                
                # parameter matrices
                self.encoder = tf.Variable(tf.truncated_normal([300, 300],\
                                           stddev=1/tf.sqrt(300.0)), name='encoder')
                
                self.decoder = tf.Variable(tf.truncated_normal([300, 300],\
                                           stddev=1/tf.sqrt(300.0)), name='decoder')
                
                self.lang_encoder = tf.Variable(tf.truncated_normal([2*dim_lang,300],\
                                               stddev=1/tf.sqrt(300.0)), name='lang_encoder')

                # language representation
                self.lang_rep = tf.Variable(tf.truncated_normal([1,2*self.dim_lang],\
                                            stddev=1/tf.sqrt(float(self.dim_lang))), name='lang_rep')
                
                # model equation
                self.target_pred = self.get_model(self.encoder, self.decoder, self.source_words,\
                                                  self.lang_rep, self.lang_encoder)
                
                #squared loss
                self.loss = tf.reduce_sum(tf.square(self.target_words-self.target_pred))
                
                self.train_step = tf.train.GradientDescentOptimizer(self.lr).minimize(self.loss)
                
                self.init = tf.global_variables_initializer()
                self.saver = tf.train.Saver()
        
        print('start building graph')
        __graph__()
        print('graph built')
        
    # encoder, decoder: 300x300   source_words: Nonex300    lang_rep: 1x2*dim_lang    lang_encoder: 2*dim_langx300
    # Output: Nonex300
    def get_model(self, encoder, decoder, source_words, lang_rep, lang_encoder):
        
        shared_source_words = tf.matmul(source_words, encoder)
        shared_lang_rep = tf.matmul(lang_rep, lang_encoder)
        
        num_examples = tf.shape(shared_source_words)[0]
        #(shared_source_words.get_shape())[0]
        tile = tf.tile(shared_lang_rep, tf.convert_to_tensor([1, num_examples]))
        shared_lang_aux = tf.reshape(tile, [num_examples, 300])
        shared_embedding_vector = shared_lang_rep+shared_lang_aux
        
        ret = tf.matmul(shared_embedding_vector, decoder)
        return ret 
    
    def get_feed(self, X, Y, src_lang, dest_lang):
        feed_dict = {self.source_words: X, self.target_words: Y}
        self.lang_rep = tf.concat([self.all_lang_rep[src_lang], self.all_lang_rep[dest_lang]], axis=1)
        return feed_dict
    
    #train -> train[en_pt]
    def train(self, train, train_lang_pairs, batch_size, validation, num_epochs=10, sess=None):
        if sess == None:
            sess = tf.Session(graph = self.graph)
            sess.run(self.init)
        
        with sess.as_default():
            print (self.lang_rep.eval()) 
        #max data for a language pair/batch size
        max_data_size = int(max([len(x) for x in train])/batch_size)
    
        for epoch in range(num_epochs):
            batch_index = np.zeros(shape=(len(train)), dtype=int)
            for batch_number in range(max_data_size):
                #make batches
                for lang_pair_data_enum in enumerate(train):
                    i, lang_pair_data = lang_pair_data_enum
                    lang_pair_data = np.array(lang_pair_data)
                    print(np.shape(lang_pair_data))
                    print(batch_index[i])
                    cur_batch = lang_pair_data[batch_index[i]:(batch_index[i]+batch_size),:,:]
                    batch_index[i] += batch_size
                    print(np.shape(cur_batch))
                    X = cur_batch[:,0,:]
                    Y = cur_batch[:,1,:]
                    print(np.shape(X))
                    print(np.shape(Y))
                    lang = train_lang_pairs[i].split('-')

                    #batch index more that lang pair data length not handled :P
                    
                    _, train_loss = sess.run([self.train_step, self.loss], self.get_feed(X, Y, lang[0], lang[1]))
                    
                  
                    print(self.lang_rep.get_shape())
                    self.all_lang_rep[lang[0]], self.all_lang_rep[lang[1]] = tf.split(self.lang_rep, 2, axis=1)
                    
                    print("Batch:" + str(batch_number))
                    print("Loss:" + str(train_loss))
                    print(lang)
                    print("------------------------------")
            
            #save epoch
            if epoch and epoch%10==0:
                self.saver.save(sess, self.ckpt_path+self.model_name+".ckpt", global_step=epoch)
                
                #print losses #todo: format for train data with lang
        self.saver.save(sess, self.ckpt_path+self.model_name+".ckpt", global_step=num_epochs+1)
        np.savetxt()



In [187]:
mlpm = model(ckpt_path="./ckpt/",lr=0.01, epochs=1)

start building graph
graph built


In [188]:
mlpm.train(train_data,train_lang_pairs= ["pt-es", "pt-en", "en-es"], batch_size=40, validation=[])

[[-0.84037918 -0.01348792  0.58147603 -0.13038452 -0.62116134 -0.18478361
  -0.24986792 -0.22951819  0.48660985  0.26475525]]
(303, 2, 300)
0
(40, 2, 300)
(40, 300)
(40, 300)
(1, 10)
Batch:0
Loss:783.995
['pt', 'es']
------------------------------
(415, 2, 300)
0
(40, 2, 300)
(40, 300)
(40, 300)
(1, 10)
Batch:0
Loss:6222.53
['pt', 'en']
------------------------------
(151632, 2, 300)
0
(40, 2, 300)
(40, 300)
(40, 300)
(1, 10)
Batch:0
Loss:2.57839e+12
['en', 'es']
------------------------------
(303, 2, 300)
40
(40, 2, 300)
(40, 300)
(40, 300)
(1, 10)
Batch:1
Loss:inf
['pt', 'es']
------------------------------
(415, 2, 300)
40
(40, 2, 300)
(40, 300)
(40, 300)
(1, 10)
Batch:1
Loss:nan
['pt', 'en']
------------------------------
(151632, 2, 300)
40
(40, 2, 300)
(40, 300)
(40, 300)
(1, 10)
Batch:1
Loss:nan
['en', 'es']
------------------------------
(303, 2, 300)
80
(40, 2, 300)
(40, 300)
(40, 300)
(1, 10)
Batch:2
Loss:nan
['pt', 'es']
------------------------------
(415, 2, 300)
80
(40, 

KeyboardInterrupt: 

In [100]:
mlpm = model(ckpt_path="./ckpt/",lr=0.01, epochs=1)

start building graph
graph built


In [158]:
mlpm.all_lang_rep['en']

<tf.Tensor 'split_724:1' shape=(1, 5) dtype=float32>

In [85]:
train_data_file = "D:/UCSD/F17/CSE293/data_prep_scripts/train_data.npy"
train_data = np.load(train_data_file)

In [167]:
with 
    mlpm.all_lang_rep['en'].eval()

NameError: name 'sess' is not defined

In [None]:
mlpm