In [15]:
import numpy as np
from keras.engine import Input
from keras.layers import merge, Embedding, Dropout, Conv1D, Lambda, LSTM, Dense, concatenate, TimeDistributed
from keras import backend as K
from keras.models import Model
import tensorflow as tf
import pandas as pd
from time import strftime, gmtime, time
import random

In [3]:
# loads the embeddings variable from the tensorflow checkpoint
def load_embeddings(checkpoint_path):
    with tf.Session() as session:
        if checkpoint_path:
            saver = tf.train.import_meta_graph(checkpoint_path+'/linear_D1024.ckpt-4.meta')
            saver.restore(session, tf.train.latest_checkpoint(checkpoint_path))
          
            return session.run('embedding:0')

In [4]:
question = Input(shape=(140,), dtype='int32', name='question_base')
answer_good = Input(shape=(140,), dtype='int32', name='answer_good_base')


In [5]:
embeddings_trained=load_embeddings('./Chordvec_embedding/linear')

INFO:tensorflow:Restoring parameters from ./Chordvec_embedding/linear\linear_D1024.ckpt-4


In [6]:
pd.DataFrame(embeddings_trained)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,490,491,492,493,494,495,496,497,498,499
0,-1.382011,0.212952,-0.498862,-0.124998,-0.078452,0.212936,-1.217609,0.101249,-1.832078,0.872667,...,-0.550311,0.277064,-0.96225,0.375589,0.613075,-0.348948,-1.2437,0.996251,0.228086,0.410111
1,-0.005686,-0.458751,0.555541,-0.769725,-1.199231,-0.036278,1.595526,0.21123,-0.121884,-0.328777,...,0.257218,-0.826217,1.58144,-0.814446,0.433896,-0.186913,0.861702,-0.503112,1.463822,-0.253068
2,0.62267,-0.494212,0.705162,-0.493607,1.094337,-0.000823,0.79533,0.837211,0.902521,-0.137431,...,-1.710201,0.765003,-0.87276,0.090711,0.248969,-0.389046,0.925657,-0.012596,0.460024,1.026716
3,-0.674498,-1.569543,-0.719535,0.461658,-0.5568,0.041605,0.679605,-0.215023,-1.95392,0.567445,...,-0.886914,-0.378714,-0.127823,0.302005,-1.228078,0.857896,0.751297,-0.259543,-0.427015,-0.72806
4,-0.613742,0.120577,-1.351617,1.421042,0.710564,0.069726,-1.173964,-0.158702,0.583506,-0.841672,...,-0.467082,-0.25429,0.63021,-0.675362,1.173323,-0.127391,-0.318963,-0.327146,-0.766639,0.502223
5,1.756455,-1.175987,-0.532102,-0.479507,-0.427816,-0.642574,-0.222436,1.14846,0.328418,0.949487,...,0.969283,0.326319,-0.279008,-0.553797,-2.597933,-1.010742,0.365218,-2.053852,-0.227254,1.462331
6,-0.812254,1.684766,-0.607061,-0.868833,-1.45835,-0.891079,0.981542,-0.341061,1.595081,-0.12575,...,-1.178865,-0.774956,-0.212968,-0.810108,0.79345,0.908247,0.497718,-0.611292,1.379162,-1.604181
7,1.942836,-1.228565,-0.586395,0.075541,-0.261883,-1.002631,0.724411,-1.263883,1.685924,-0.014594,...,1.258448,-0.09374,0.798441,-0.543914,-0.031557,-1.349459,-0.002636,0.286579,1.92754,-0.48746
8,0.137452,-0.487044,0.907271,-1.966146,0.775478,0.009253,0.596811,0.953427,-0.216776,0.733678,...,-1.4043,1.306269,0.383728,0.602131,-0.959873,0.016373,0.147977,1.387019,-1.0231,-0.966267
9,0.604017,-0.759088,0.57161,-1.121203,-0.301767,0.184576,-0.533162,0.324927,-0.2393,-2.494421,...,0.113485,0.037857,-1.517274,2.110826,0.461232,0.233829,0.695734,-1.473071,0.659407,-2.085116


In [7]:
# add embedding layers
embeddings_trained=load_embeddings('./Chordvec_embedding/linear')
embedding = Embedding(input_dim=12,
                      output_dim=embeddings_trained.shape[1],#100-dimension
                      weights=[embeddings_trained])
question_embedding = embedding(question)
answer_embedding = embedding(answer_good)


INFO:tensorflow:Restoring parameters from ./Chordvec_embedding/linear\linear_D1024.ckpt-4


In [8]:

hidden_layer = TimeDistributed(Dense(200, activation='tanh'))

question_hl = hidden_layer(question_embedding)
answer_hl = hidden_layer(answer_embedding)

# cnn
cnns = [Conv1D(kernel_size=kernel_size,
               filters=1000,
               activation='tanh',
               padding='same') for kernel_size in [2, 3, 5, 7]]
# question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat')
question_cnn = concatenate([cnn(question_hl) for cnn in cnns], axis=-1)
# answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat')
answer_cnn = concatenate([cnn(answer_hl) for cnn in cnns], axis=-1)

# maxpooling
maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]))
maxpool.supports_masking = True
# enc = Dense(100, activation='tanh')
# question_pool = enc(maxpool(question_cnn))
# answer_pool = enc(maxpool(answer_cnn))
question_pool = maxpool(question_cnn)
answer_pool = maxpool(answer_cnn)

In [9]:
model = Model(inputs=[question, answer_good], outputs=[question_pool,answer_pool])

In [10]:
def save_epoch(epoch,model):
    if not os.path.exists('models/'):
        os.makedirs('models/')
    model.save_weights('models/weights_epoch_%d.h5' % epoch, overwrite=True)
def get_time():
    return strftime('%Y-%m-%d %H:%M:%S', gmtime())

In [11]:
val_loss = {'loss': 1., 'epoch': 0}


In [12]:
import pickle

with open('./QA_data/questions0501', 'rb') as pickle_load:
    Q = pickle.load(pickle_load)
with open('./QA_data/good_answers0501', 'rb') as pickle_load:
    good_Ans = pickle.load(pickle_load)
with open('./QA_data/all_answers0501', 'rb') as pickle_load:
    all_answers = pickle.load(pickle_load)
print('ok')

In [None]:
nb_epoch=100
for i in range(1, nb_epoch+1):
    # sample from all answers to get bad answers
    # if i % 2 == 0:
    #     bad_answers = self.pada(random.sample(self.answers.values(), len(good_answers)))
    # else:
    #     bad_answers = self.pada(get_bad_samples(indices, top_50))

    #random.sample第一個參數吃list
    #self.answers.values():dist
    bad_answers = random.sample(all_answers), len(good_Ans))

    print('Fitting epoch %d' % i)
    hist = model.fit([questions, good_answers, bad_answers], epochs=1, batch_size=100,
                          validation_split=0.1, verbose=1)

    if hist.history['val_loss'][0] < val_loss['loss']:
        val_loss = {'loss': hist.history['val_loss'][0], 'epoch': i}
    print('%s -- Epoch %d ' % (get_time(), i) +
                'Loss = %.4f, Validation Loss = %.4f ' % (hist.history['loss'][0], hist.history['val_loss'][0]) +
                '(Best: Loss = %.4f, Epoch = %d)' % (val_loss['loss'], val_loss['epoch']))

    save_epoch(i,model)
best_loss=val_loss