<a href="https://colab.research.google.com/github/jiayiwang5/Google-Colab/blob/master/chatbot_training_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from keras import backend as K
from keras.engine.topology import Layer
from keras import initializers, regularizers, constraints


class AttentionL(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(AttentionL, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight((input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        return None

    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                        K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0],  self.features_dim

    def get_config(self):
        config={'step_dim':self.step_dim}
        base_config = super(AttentionL, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

Using TensorFlow backend.


In [0]:
import numpy as np
import pickle
import operator
import os

import tensorflow as tf
# # This address identifies the TPU we'll use when configuring TensorFlow.
# TPU_WORKER = 'grpc://' + os.environ['COLAB_TPU_ADDR']
# tf.logging.set_verbosity(tf.logging.INFO)

# data_dir = np.arange(0, 105000, 5000)
data_dir = np.arange(0, 101000, 1000)
last_model_index = 0
main_path = '/content/drive/My Drive/Colab Notebooks/'

def get_file_list(file_path):
    dir_list = os.listdir(file_path)
    if not dir_list:
        return
    else:
        # os.path.getmtime() 函数是获取文件最后修改时间
        # os.path.getctime() 函数是获取文件最后创建时间
        dir_list = sorted(dir_list, key=lambda x: os.path.getmtime(os.path.join(file_path, x)))
        # print(dir_list)
    return dir_list

if len(os.listdir(main_path + 'modles/')) > 0:
  last_model_list = get_file_list(main_path + 'modles/')
  last_model_name = str(last_model_list[-1])
  last_model_number = last_model_name.split('-')[1]
  last_model_index = np.where(data_dir == int(last_model_number))[0][0]
  print(last_model_number, last_model_index)

  
for A_pos, A_name in enumerate(data_dir[last_model_index:]):
  data_path = main_path + 'data_sections_small/'
  A_name = str(A_name)
  
  print('Begin Loading from File... '+ data_path + A_name)
  context = np.load(data_path + A_name + '/context_indexes.npy')
  final_target = np.load(data_path + A_name + '/target_indexes.npy')
  
  with open(main_path + 'middle_data/dictionary.pkl', 'rb') as f:
      word_to_index = pickle.load(f)


  '''
      the indexes of the words start with 0. 
      But when the sequences are padded later on, they too will be zeros.
      so, shift all the index values one position to the right, 
      so that 0 is spared, and used only to pad the sequences
  '''
  for i, j in word_to_index.items():
      word_to_index[i] = j + 1

  index_to_word = {}
  for key, value in word_to_index.items():
      index_to_word[value] = key

  final_target_ = final_target
  context_ = context
  maxLen = 20

  for pos, i in enumerate(final_target_):
      for pos_, j in enumerate(i):
          i[pos_] = j + 1
      if(len(i) > maxLen):
          final_target_[pos] = i[:maxLen]

  for pos, i in enumerate(context_):
      for pos_, j in enumerate(i):
          i[pos_] = j + 1
      if(len(i) > maxLen):
          context_[pos] = i[:maxLen]
  print(context_.shape)
  # print(context_)


  with open(main_path + 'middle_data/words.pkl', 'rb') as f:
      words = pickle.load(f)

  '''
  since the indexes start from 1 and not 0, 
  we add 1 to the no. of total words to get the vocabulary size 
  (while initializing and populating arrays later on, this will be required)
  '''
  vocab_size = len(word_to_index) + 1
  print('word_to_vec_map: ', len(list(words)))
  print('vocab_size: ', vocab_size)



  with open(main_path + 'middle_data/embedding_matrix.pkl', 'rb') as f:
      embedding_matrix = pickle.load(f)

  print(embedding_matrix.shape)

  # outs为final_target_左一位偏移
  # (样本数，最大句子长度，词表大小)
  outs = np.zeros([context_.shape[0], maxLen, vocab_size], dtype='float32')
  for pos, i in enumerate(final_target_):
      for pos_, j in enumerate(i):
          if pos_ > 20:
              print(i)
          if pos_ > 0:
              outs[pos, pos_-1, j] = 1 # one-hot
      if pos%1000 == 0 :
          print('{} entries completed'.format(pos)) # format()填充{}，格式化输出
  print(outs.shape)
  # print(outs[0])

  from keras.preprocessing import sequence
  #后端padding
  final_target_ = sequence.pad_sequences(final_target_, maxlen=maxLen,
                                        dtype='int32', padding='post', 
                                         truncating='post')
  context_ = sequence.pad_sequences(context_, maxlen=maxLen,
                                   dtype='int32', padding='post',
                                   truncating='post')
  # print(context_)

  from keras.layers import Embedding
  from keras.layers import Input, Dense, LSTM, TimeDistributed, Bidirectional, Concatenate, Dropout, Activation, Dot, RepeatVector
  from keras.models import Model
  from keras.utils import plot_model
  from keras.callbacks import ModelCheckpoint

  embed_layer = Embedding(input_dim=vocab_size, output_dim=50, trainable=True)
  embed_layer.build((None,))
  embed_layer.set_weights([embedding_matrix])

  LSTM_cell = Bidirectional(LSTM(512, return_sequences=True, return_state=True))
  LSTM_decoder = LSTM(1024, return_sequences=True, return_state=True)

  dense = TimeDistributed(Dense(vocab_size, activation='softmax'))

  #encoder输入 与 decoder输入
  input_context = Input(shape=(maxLen, ), dtype='int32', name='input_context')
  input_target = Input(shape=(maxLen, ), dtype='int32', name='input_target')

  input_context_embed = embed_layer(input_context)
  input_target_embed = embed_layer(input_target)

  encoder_out, forward_h, forward_c, backward_h, backward_c = LSTM_cell(input_context_embed)
  context_h = Concatenate()([forward_h, backward_h])
  context_c = Concatenate()([forward_c, backward_c])
  decoder_lstm, _, _ = LSTM_decoder(input_target_embed, 
                                    initial_state=[context_h, context_c])
  
  concatenator = Concatenate(axis=-1)
  attention = AttentionL(maxLen)(encoder_out)
  attention = RepeatVector(maxLen)(attention) 
  merge = concatenator([attention,decoder_lstm]) 

  output = dense(merge)

  model = Model([input_context, input_target], output)

  model.compile(optimizer='rmsprop', loss='categorical_crossentropy', 
                metrics=['accuracy'])
  model.summary() 

  filepath = main_path + "modles/weights-" + A_name + "-{epoch:03d}-{loss:.4f}-bigger.hdf5"
  checkpoint = ModelCheckpoint(filepath,
                                 monitor='loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min',
                                 period=15,
                                 save_weights_only=True)
  callbacks_list = [checkpoint]
  
  initial_epoch=0
  file_list = os.listdir(main_path + 'modles/')
  if len(file_list) > 0:
    epoch_list = get_file_list(main_path + 'modles/')
    epoch_last = epoch_list[-1]
    model.load_weights(main_path + 'modles/' + epoch_last)
    if len(file_list) > 2:
        for file_name in file_list[:-2]:
            file_ = main_path + 'modles/' + file_name
            os.remove(file_)
            print('Removed Successful! -- ', file_name)
    print("checkpoint_loaded: ", epoch_last)
    if epoch_last.split('-')[2] == '015' and epoch_last.split('-')[1] == A_name:
      initial_epoch = 15
    if epoch_last.split('-')[2] == '030' and epoch_last.split('-')[1] == A_name:
      initial_epoch = 30
    print('Begin from epoch: ', str(initial_epoch))
    
    
#     tpu_model = tf.contrib.tpu.keras_to_tpu_model(model,
#                                                   strategy=tf.contrib.tpu.TPUDistributionStrategy(
#                                                   tf.contrib.cluster_resolver.TPUClusterResolver(TPU_WORKER)))

#     tpu_model.fit([context_, final_target_], 
#                 outs, 
#                 epochs=30, 
#                 batch_size=1*8, 
#                 validation_split=0.1, 
#                 callbacks=callbacks_list,
#                 initial_epoch=initial_epoch
#                 )

  model.fit([context_, final_target_], 
            outs, 
            epochs=30, 
            batch_size=10, 
            validation_split=0.1, 
            callbacks=callbacks_list,
            initial_epoch=initial_epoch,
            verbose=2
           )


In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
!pwd
!ls "/content/drive/My Drive/Colab Notebooks/modles/"

/content
