In [None]:

###################################
##### Preparation Stage ###########
###################################

# Loading the IMDB dataset
#******************************
from tensorflow.keras.datasets import imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

train_data[0]
train_labels[0]

max([max(sequence) for sequence in train_data])


# Decoding reviews back to text - Just to be able to see the original text
#************************************************************************
word_index = imdb.get_word_index()
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
decoded_first_review = " ".join([reverse_word_index.get(i - 3, "?") for i in train_data[0]])
decoded_first_review

#Preparing the data
#Encoding the integer sequences via multi-hot encoding
#*****************************************************
import numpy as np
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        for j in sequence:
            results[i, j] = 1.
    return results
x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

x_train[0] # the first review after one hot encoding

y_train = np.asarray(train_labels).astype("float32")
y_test = np.asarray(test_labels).astype("float32")

# Building your model
# Model definition
#******************************
from tensorflow import keras
from tensorflow.keras import layers


###############################################
##### Hyper Parameters tuning phase ###########
###############################################

first_layer_size = 4
second_layer_size = 4

model = keras.Sequential([
    layers.Dense(first_layer_size, activation="relu"),
    layers.Dense(second_layer_size, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])

# Show the structure of the weights, 
# this is not a mandatory stage
# **********************************
model.build(x_train.shape)
model.summary()


# Compiling the model
#******************************
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])

# Validating your approach
# Setting aside a validation set
#*******************************
x_val = x_train[:10000] # to be used for validation
y_val = y_train[:10000] # to be used for validation

partial_x_train = x_train[10000:] 
partial_y_train = y_train[10000:]


# Training your model
#*******************************
history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val))

history_dict = history.history
history_dict.keys()

# Plotting the training and validation loss
#******************************************
import matplotlib.pyplot as plt
history_dict = history.history
loss_values = history_dict["loss"]
val_loss_values = history_dict["val_loss"]
epochs = range(1, len(loss_values) + 1)
plt.plot(epochs, loss_values, "bo", label="Training loss")
plt.plot(epochs, val_loss_values, "b", label="Validation loss")
plt.title("Training and validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()


# Plotting the training and validation accuracy
#**********************************************
plt.clf()
acc = history_dict["accuracy"]
val_acc = history_dict["val_accuracy"]
plt.plot(epochs, acc, "bo", label="Training acc")
plt.plot(epochs, val_acc, "b", label="Validation acc")
plt.title("Training and validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()


#################################################
##### Test the result on Test DataSet ###########
#################################################

# Replace the hyper parameters :
#   number of neurons in each layer
#   number of layers
#   activation function
#   number of epochs

# Retraining a model from scratch to the optimal point before overfitting
#********************************
model = keras.Sequential([
    layers.Dense(first_layer_size, activation="relu"),
    layers.Dense(second_layer_size, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
model.fit(x_train, y_train, epochs=4, batch_size=512)
results = model.evaluate(x_test, y_test)

print("***test results***")
results

# Using a trained model to generate predictions on new data
#**********************************************************
print("**** test prediction *****")
model.predict(x_test)


