In [1]:
import tensorflow as tf
import numpy as np
import utils.utils as utils

In [2]:
captions, tags= utils.load_data('data/insta-hashtag-test1.json')


c_word2idx, c_idx2word, c_vocab_size = utils.word_idx_mappings(captions)

t_word2idx, t_idx2word, t_vocab_size = utils.word_idx_mappings(tags)


captions, c_lengths = utils.text2idx(doc=captions,word2idx=c_word2idx)
tags, t_lengths = utils.text2idx(tags,t_word2idx)

captions = utils.pad_data(captions,c_lengths)
#tags = utils.pad_data(captions,t_lengths)

captions_tr,tags_tr,lengths_tr,captions_tst,tags_tst,lengths_tst = utils.generate_train_test_split(np.array(captions),np.array(tags),np.array(c_lengths))

input_dims = len(captions_tr[0])

In [3]:
print(len(tags))
print(len(captions_tr[0]))

43842
407


In [4]:
class MultiLayerLSTM:
    def __init__(self, name, c_word2idx, c_idx2word, c_vocab_size,
                 t_word2idx, t_idx2word, t_vocab_size,input_dims,learning_rate=1e-2,
                 batch_size=256,embedding_dims=64,num_hidden=128,num_layers=2,keep_prob=0.5, num_neg_samples=10,epochs=2):
        self.name = name
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.embedding_dims = embedding_dims
        self.num_hidden=num_hidden
        self.num_layers=num_layers
        self.keep_prob=keep_prob
        self.input_dims=input_dims
        self.epochs=epochs
        self.num_sampled = num_neg_samples
        self.word2idx={'captions':c_word2idx,
                       'tags':t_word2idx
                      }
        self.idx2word={'captions':c_idx2word,
                       'tags':t_idx2word
                      }
        self.vocab_size={'captions':c_vocab_size,
                        'tags':t_vocab_size}
        
    def build(self):
        with tf.variable_scope(self.name,reuse=tf.AUTO_REUSE):
            self.inputs = tf.placeholder(dtype=tf.int32,shape=[None,self.input_dims], name='inputs')
            self.targets = tf.placeholder(dtype=tf.int32, shape=[None,1], name='targets')
            self.seq_lengths = tf.placeholder(dtype=tf.int32,shape=[None], name='inputs')
            
            
            with tf.name_scope('Embeddings'):
                self.embedding_matrix = tf.Variable(dtype=tf.float32,initial_value=tf.random_uniform([self.vocab_size['captions'],self.num_hidden],-1.0,1.0))
                self.em_lookup = tf.nn.embedding_lookup(self.embedding_matrix,self.inputs)
                
            with tf.name_scope('LSTMs'):
                def make_cell():
                    cell = tf.contrib.rnn.LSTMCell(self.num_hidden)
                    cell = tf.contrib.rnn.DropoutWrapper(cell,self.keep_prob)
                    return cell
                self.stacked_cells = [make_cell() for _ in range(self.num_layers)]
                self.stacked_cells = tf.contrib.rnn.MultiRNNCell(self.stacked_cells)
                
                self.outputs, self.states = tf.nn.dynamic_rnn(self.stacked_cells,self.em_lookup,sequence_length=self.seq_lengths,dtype=tf.float32)
                
            with tf.name_scope('loss_accuracy'):
                
                self.Wl = tf.Variable(dtype=tf.float32, initial_value=tf.random_normal(shape=[self.vocab_size['tags'],self.num_hidden]), name='Wl')
                self.bl = tf.Variable(dtype=tf.float32, initial_value=tf.random_normal(shape=[self.vocab_size['tags']]),name='bl')
                                
                self.train_loss = tf.reduce_mean(tf.nn.nce_loss(weights=self.Wl,biases=self.bl,labels=self.targets,
                                                 inputs=self.outputs[:,-1,:],num_sampled=self.num_sampled,num_classes=self.vocab_size['tags']))
                
                self.logits = tf.matmul(self.outputs[:,-1,:],tf.transpose(self.Wl))+self.bl
                
                self.softmax = tf.nn.softmax(self.logits)
                
                self.eval_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits,labels=self.targets))
                
                self.correct_prediction = tf.equal(tf.argmax(self.targets,1), tf.argmax(self.logits, 1))
                self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction,tf.float32))*100
                self.opt_step = tf.train.AdamOptimizer(self.learning_rate).minimize(self.train_loss)
                
    
    def train(self,tr_inputs,tr_targets,tr_lengths,val_inputs,val_targets,val_lengths):
        
        self.session = tf.Session()
        sess = self.session
        tr_losses=[]
        tr_accs=[]
        val_losses=[]
        val_accs=[]
        with sess.as_default():
            sess.run(tf.global_variables_initializer())
            
            for e in range(self.epochs):
                step=0
                print(f'EPOCH {e+1}:')
                for batch_idx in utils.get_batch_idx(tr_inputs,self.batch_size):
                    _ = sess.run([self.opt_step],feed_dict={self.inputs:tr_inputs[batch_idx],self.targets:tr_targets[batch_idx],self.seq_lengths:tr_lengths[batch_idx]})
                    if step%4000 == 0:
                        tr_loss,tr_acc=sess.run([self.eval_loss,self.accuracy],feed_dict={self.inputs:tr_inputs[batch_idx],
                                                                                          self.targets:tr_targets[batch_idx],
                                                                                          self.seq_lengths:tr_lengths[batch_idx]})
                        
                        val_loss,val_acc=sess.run([self.eval_loss,self.accuracy],feed_dict={self.inputs:val_inputs,self.targets:val_targets,self.seq_lengths:val_lengths})
                        
                        tr_losses+=[tr_loss]
                        tr_accs+=[tr_acc]
                        val_losses+=[val_loss]
                        val_accs+=[val_acc]
                        
                        print(f'Iteration {step}:')
                        print(f'TRAIN_LOSS:{tr_loss:.3f}, TRAIN_ACC:{tr_acc:.3f}')
                        print(f'VAL_LOSS  :{val_loss:.3f}, VAL_ACC  :{val_acc:.3f}')

                    step+=1
            self.train_history={
                'train_losses':tr_losses,
                'train_accs':tr_accs,
                'val_losses':val_losses,
                'val_accs':val_accs
            }
                    
    def predict(caption,num_tags):
        
        c_len = len(caption)
        caption = [c_word2idx[word] for word in caption] + [0]*(self.input_dims-c_len)
        
        with self.session.as_default():
            
            smx = self.session.run(self.softmax,feed_dict={self.inputs:caption,self.seq_lengths:[c_len]})
            
            tag_ids = np.argsort(smx)[::-1]
            tag_ids = tag_ids[:num_tags]
            top_tags = [(t_idx2word[id],round(smx[id],3)) for id in tag_ids]
            
        return top_tags
            
                
                
            

In [5]:
rnn = MultiLayerLSTM('hashtagram',c_word2idx,c_idx2word,c_vocab_size,
                     t_word2idx,t_idx2word,t_vocab_size,input_dims)

In [6]:
rnn.build()

In [7]:
rnn.train(captions_tr,tags_tr,lengths_tr,captions_tst,tags_tst,lengths_tst)

EPOCH 1:


KeyboardInterrupt: 