In [None]:
import os 
import pandas as pd
import numpy as np 
import time 

import tensorflow as tf 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dense
from tensorflow.keras.layers import Embedding, Dropout, Flatten
from tensorflow.keras.layers import GlobalMaxPooling1D, SpatialDropout1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from tensorflow.keras.datasets import imdb

In [None]:
print("GPU is", "available" if tf.config.experimental.list_physical_devices("GPU") else "NOT AVAILABLE")

GPU is available


In [None]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [None]:
# define some parameters 

vocab_size = 10000
max_sequence_length = 1000
embedding_dim = 200 

In [None]:
# Tokenizer and Sequences 

tokenizer = Tokenizer(num_words=vocab_size, lower=False)
tokenizer.fit_on_texts(X_train)

X_train = tokenizer.texts_to_matrix(X_train)
X_test = tokenizer.texts_to_matrix(X_test)

X_train = pad_sequences(X_train, maxlen=max_sequence_length, padding="post", truncating="post")
X_test = pad_sequences(X_test, maxlen=max_sequence_length, padding="post", truncating="post")

In [None]:
# word index 
word_index = tokenizer.word_index
print("Vocabulary size: ", len(word_index))

Vocabulary size:  9998


<br>
<hr>

### Conv1D 

The input of Conv1D has the following format: format = (sequences of vector, number of dimensions)


<br>

In [None]:
# Convolutional 1D for text 
model = Sequential()
model.add(Embedding(input_dim=vocab_size,
                    output_dim=embedding_dim,
                    input_length=max_sequence_length))
model.add(Conv1D(filters=16, kernel_size=1, strides=1, padding="valid", activation="relu"))
model.add(MaxPooling1D(pool_size=2, strides=1, padding="valid"))
model.add(Conv1D(filters=32, kernel_size=1, strides=1, padding="valid", activation="relu"))
model.add(MaxPooling1D(pool_size=2, strides=1, padding="valid"))
model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.25))
model.add(Dense(1, activation="sigmoid"))


model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 1000, 200)         2000000   
_________________________________________________________________
conv1d (Conv1D)              (None, 1000, 16)          3216      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 999, 16)           0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 999, 32)           544       
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 998, 32)           0         
_________________________________________________________________
flatten (Flatten)            (None, 31936)             0         
_________________________________________________________________
dense (Dense)                (None, 512)               1

In [None]:
model.compile(optimizer="adam",
              loss="binary_crossentropy",
              metrics=["accuracy"])


model.fit(X_train, y_train,
          epochs=10,
          batch_size=128,
          validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f0d93a19e10>

<hr>
<br>