In [13]:
#We are going to import all necessary libraries
import numpy as np
from tensorflow import keras#tensorflow instead of pytorch
from tensorflow.keras import layers
max_features = 20000  # Only consider the top 20k words
maxlen = 200  # Only consider the first 200 words of each movie review

In [14]:
# Input for variable-length sequences of integers
inputs = keras.Input(shape=(None,), dtype="int32")
# Embed each integer in a 128-dimensional vector
x = layers.Embedding(max_features, 128)(inputs)
# Add 2 bidirectional LSTMs
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x)
x = layers.Bidirectional(layers.LSTM(64))(x)
# Add a classifier
outputs = layers.Dense(1, activation="sigmoid")(x)#Squish function
model = keras.Model(inputs, outputs)
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 128)         2560000   
                                                                 
 bidirectional (Bidirectiona  (None, None, 128)        98816     
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 128)              98816     
 nal)                                                            
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 2,757,761
Trainable params: 2,757,761
Non-train

In [15]:
#Training the model
(x_train, y_train), (x_val, y_val) = keras.datasets.imdb.load_data(
    num_words=max_features
)
print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
25000 Training sequences
25000 Validation sequences


In [None]:
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])#This takes about 7 minutes to run and the accuracy I got was 89%
model.fit(x_train, y_train, batch_size=32, epochs=2, validation_data=(x_val, y_val))
#This example led me to dive deeper into LSTM(Long short term memory) models. They are a form of recurrent neural networks which re input previous information along with new information to more accurate learning
#This type of neural network is perfect for imdb movie recognition as it can recognize sentiment as it goes through more and more movie reviews. This was something we did not do in class. I also did a litle 
#more research and I found out that LSTM networks are actually very good for NLPs and I plan on trying another example in my free time which links the two.