### For testing dataset

In [1]:
import tensorflow as tf
from tensorflow import keras
import os
import string
import re
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

In [2]:
current_path = os.getcwd()
path1 = os.path.join(current_path,"aclImdb")
train_folder=os.path.join(path1,"train")
os.listdir(train_folder)

['labeledBow.feat',
 'neg',
 'pos',
 'unsupBow.feat',
 'urls_neg.txt',
 'urls_pos.txt',
 'urls_unsup.txt']

In [3]:
test_data=tf.keras.preprocessing.text_dataset_from_directory('aclImdb/test')

Found 25000 files belonging to 2 classes.


In [4]:
def clean_text(input_data):
    # Converting each word to lower case because vocabulary is also in lower case in 'imdb.vocab'
    lower = tf.strings.lower(input_data)
    # Removing new line characters from movie reviews
    space = tf.strings.regex_replace(lower, "<br />", " ")
    # Removing punctuations from sentences 
    clean_punch = tf.strings.regex_replace(space, '[%s]' %re.escape(string.punctuation), '')
    return clean_punch

numerical_text_layer = TextVectorization(standardize=clean_text, vocabulary='aclImdb/imdb.vocab',
    output_mode='int', output_sequence_length=500)

def print_numerical_text(text, label):
    # Converting text tensor shape from () to (1,) because of numerical_text_layer.
    text = tf.expand_dims(text, -1)
    # print(text.shape)
    # Applied vocabulary on text data and returned it the numerical text and label.
    return numerical_text_layer(text), label

numerical_test_data = test_data.map(print_numerical_text)

### Loading the model saved from models folder

In [5]:
testing_model=keras.models.load_model('models/20912881_NLP_model.h5')
testing_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 16)           1432464   
_________________________________________________________________
dropout (Dropout)            (None, 500, 16)           0         
_________________________________________________________________
global_average_pooling1d_1 ( (None, 16)                0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 16)                272       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 17        
Total params: 1,432,753
Trainable params: 1,432,753
Non-trainable params: 0
____________________________________________

#### Calculating the loss and accuracy

In [6]:
loss, accuracy = testing_model.evaluate(numerical_test_data)
print("For Test dataset")
print("Accuracy:",accuracy)
print("Loss:",loss)

For Test dataset
Accuracy: 0.8812400102615356
Loss: 0.33158448338508606
