In [54]:
from tensorflow.keras.layers import Embedding
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense, Dropout, Conv1D, MaxPooling1D, GlobalMaxPooling1D
from tensorflow.keras.initializers import Constant
from keras.layers import TextVectorization
import numpy as np
import pickle
import json

In [55]:
X = np.load('data/X.npy', allow_pickle=True)
y = np.load('data/y.npy')
embedding_matrix = np.load('data/embedding_matrix.npy')
num_tokens = np.load('data/num_tokens.npy').item()
embedding_dim = np.load('data/embedding_dim.npy').item()
max_tokens = np.load('data/max_tokens.npy').item()
max_len = np.load('data/max_len.npy').item()

In [56]:
vectorizer = TextVectorization(max_tokens=max_tokens,
                               standardize=None,
                               ngrams=(1, 2),
                               output_sequence_length=max_len)
vectorizer.adapt(X)

2022-02-02 14:26:56.871920: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


In [57]:
y = y.reshape((-1,1))

In [58]:
print(X.shape)
print(y.shape)

(7613,)
(7613, 1)


In [59]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [60]:
np.save('data/x_train.npy', x_train)
np.save('data/y_train.npy', y_train)
np.save('data/x_test.npy', x_test)
np.save('data/y_test.npy', y_test)

In [61]:
sequences_input = Input(shape=(1), dtype='string')

vectorizer_sequence = vectorizer(sequences_input)

embedding_layer = Embedding(num_tokens,
                            embedding_dim,
                            embeddings_initializer=Constant(embedding_matrix),
                            trainable=False)
embedded_sequences = embedding_layer(vectorizer_sequence)

x = Dense(100, activation="relu")(embedded_sequences)
x = Dropout(0.2)(x)
x = Dense(50, activation="relu")(embedded_sequences)
x = Dropout(0.2)(x)
x = Dense(20, activation="relu")(x)
x = Dropout(0.2)(x)
x = Dense(5, activation="relu")(x)
x = Dropout(0.2)(x)

preds = Dense(units=1,
              kernel_initializer='uniform',
              activation='sigmoid')(x)
model = Model(sequences_input, preds)

model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

model.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_7 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization_6 (TextV  (None, 68)               0         
 ectorization)                                                   
                                                                 
 embedding_6 (Embedding)     (None, 68, 100)           5000200   
                                                                 
 dense_27 (Dense)            (None, 68, 50)            5050      
                                                                 
 dropout_21 (Dropout)        (None, 68, 50)            0         
                                                                 
 dense_28 (Dense)            (None, 68, 20)            1020      
                                                           

In [62]:
n_samples = x_train.shape[0]

history = model.fit(x_train,
                    y_train,
                    validation_split=0.2,
                    batch_size=int(n_samples*0.1),
                    epochs=700)

model.save('models/model3.tf')

with open('models/history3.json', 'w') as f:
    json.dump(history.history, f)

Epoch 1/700


2022-02-02 14:26:58.410960: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.




2022-02-02 14:26:59.268505: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/700
Epoch 3/700
Epoch 4/700
Epoch 5/700
Epoch 6/700
Epoch 7/700
Epoch 8/700
Epoch 9/700
Epoch 10/700
Epoch 11/700
Epoch 12/700
Epoch 13/700
Epoch 14/700
Epoch 15/700
Epoch 16/700
Epoch 17/700
Epoch 18/700
Epoch 19/700
Epoch 20/700
Epoch 21/700
Epoch 22/700
Epoch 23/700
Epoch 24/700
Epoch 25/700
Epoch 26/700
Epoch 27/700
Epoch 28/700
Epoch 29/700
Epoch 30/700
Epoch 31/700
Epoch 32/700
Epoch 33/700
Epoch 34/700
Epoch 35/700
Epoch 36/700
Epoch 37/700
Epoch 38/700
Epoch 39/700
Epoch 40/700
Epoch 41/700
Epoch 42/700
Epoch 43/700
Epoch 44/700
Epoch 45/700
Epoch 46/700
Epoch 47/700
Epoch 48/700
Epoch 49/700
Epoch 50/700
Epoch 51/700
Epoch 52/700
Epoch 53/700
Epoch 54/700
Epoch 55/700
Epoch 56/700
Epoch 57/700
Epoch 58/700
Epoch 59/700
Epoch 60/700
Epoch 61/700
Epoch 62/700
Epoch 63/700
Epoch 64/700
Epoch 65/700
Epoch 66/700
Epoch 67/700
Epoch 68/700
Epoch 69/700
Epoch 70/700
Epoch 71/700
Epoch 72/700
Epoch 73/700
Epoch 74/700
Epoch 75/700
Epoch 76/700
Epoch 77/700
Epoch 78/700
Epoch 7

2022-02-02 14:31:52.118249: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


INFO:tensorflow:Assets written to: models/model3.tf/assets
