In [1]:
from __future__ import print_function

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import LSTM
from keras.layers import Conv1D, MaxPooling1D
from keras.datasets import imdb
import tensorflow as tf
import numpy as np

Using TensorFlow backend.


In [2]:
#Allocation of memory for the GPU process
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_virtual_device_configuration(gpus[0],
      [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=3072)])
logical = tf.config.experimental.list_logical_devices('GPU')
print(logical[0])

LogicalDevice(name='/device:GPU:0', device_type='GPU')


In [13]:
# Embedding
max_features = 20000
maxlen = 100
embedding_size = 128

# Convolution
kernel_size = 5
filters = 64
pool_size = 4

# LSTM
lstm_output_size = 70

# Training
batch_size = 30
epochs = 4 #modify as required, should be even

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')


print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

#Data split for unsupervised training
x_train1 = x_train[0:2000]
x_train2 = x_train[2001:24999]
y_train1 = y_train[0:2000]
y_train2 = y_train[2001:24999]


Loading data...
25000 train sequences
25000 test sequences
Pad sequences (samples x time)


In [14]:
print('Build models...')

class Network(Sequential):
  def __init__(self):
    super().__init__()
    self.add(Embedding(max_features, embedding_size, input_length=maxlen))
    self.add(Dropout(0.25))
    self.add(Conv1D(filters, kernel_size, padding='valid', activation='relu', strides=1))
    self.add(MaxPooling1D(pool_size=pool_size))

    self.add(LSTM(lstm_output_size))
    self.add(Dense(1))
    self.add(Activation('sigmoid'))

#Default for starting weights
model = Network()
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model_benchmark = Network()
model_small_set = Network()
model_iterative_training = Network()

#Benchmark
model_benchmark.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model_benchmark.set_weights(model.get_weights())

print('Train Benchmark')
model_benchmark.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(x_test, y_test),verbose=2)
score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
print('Benchmark test score:', score)
print('Benchmark test accuracy:', acc)
model_benchmark.save('Mdl_Bench.h5')

#Smaller training set
print('Train on smaller set')
model_small_set.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model_small_set.set_weights(model.get_weights())
model_small_set.fit(x_train1, y_train1,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(x_test, y_test),verbose=2)
score, acc = model_small_set.evaluate(x_test, y_test, batch_size=batch_size)
print('Small model test score:', score)
print('Small model test accuracy:', acc)
model_small_set.save('Mdl_Small.h5')

#Iterative_train
print('Train iter model')
model_iterative_training.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model_iterative_training.set_weights(model.get_weights())

model_iterative_training.fit(x_train1, y_train1,
          batch_size=batch_size,
          epochs=int(epochs/2),
          validation_data=(x_test, y_test),verbose=2)

a = model_iterative_training.predict(x_train2)

i = 0
k = np.array([1])
for x in np.nditer(a):
     if x>=0.5:
        x_train1 = np.append(x_train1, np.array([x_train2[i]]), axis = 0)
        y_train1 = np.append(y_train1, k)
        i += 1
     else:
        i += 1


model_iterative_training.fit(x_train1, y_train1,
          batch_size=batch_size,
          epochs=int(epochs/2),
          validation_data=(x_test, y_test),verbose=2)

score, acc = model_iterative_training.evaluate(x_test, y_test, batch_size=batch_size, verbose =2)
print('Iterative test score:', score)
print('Iterative test accuracy:', acc)
model_iterative_training.save('Mdl_iter.h5')


Build models...
Train Benchmark
Train on 25000 samples, validate on 25000 samples
Epoch 1/4
 - 45s - loss: 0.3863 - accuracy: 0.8194 - val_loss: 0.3234 - val_accuracy: 0.8583
Epoch 2/4
 - 43s - loss: 0.1957 - accuracy: 0.9244 - val_loss: 0.3376 - val_accuracy: 0.8567
Epoch 3/4
 - 43s - loss: 0.0916 - accuracy: 0.9677 - val_loss: 0.5042 - val_accuracy: 0.8435
Epoch 4/4
 - 45s - loss: 0.0405 - accuracy: 0.9875 - val_loss: 0.5013 - val_accuracy: 0.8404
Benchmark test score: 0.6932262471914291
Benchmark test accuracy: 0.5003600120544434
Train on smaller set
Train on 2000 samples, validate on 25000 samples
Epoch 1/4
 - 11s - loss: 0.6740 - accuracy: 0.5710 - val_loss: 0.6226 - val_accuracy: 0.7008
Epoch 2/4
 - 10s - loss: 0.3728 - accuracy: 0.8430 - val_loss: 0.4354 - val_accuracy: 0.7946
Epoch 3/4
 - 10s - loss: 0.0867 - accuracy: 0.9725 - val_loss: 0.5201 - val_accuracy: 0.7906
Epoch 4/4
 - 10s - loss: 0.0167 - accuracy: 0.9975 - val_loss: 0.6723 - val_accuracy: 0.7860
Small model test sc