In [1]:
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras import mixed_precision

In [2]:
#Load the data and the tokenizer
x = np.load('/content/drive/MyDrive/Colab Notebooks/Sentiment Analysis/features.np.npy')
y = np.load('/content/drive/MyDrive/Colab Notebooks/Sentiment Analysis/labels.np.npy')
with open('/content/drive/MyDrive/Colab Notebooks/Sentiment Analysis/tokenizer.pkl','rb') as f:
  tokenizer = pickle.load(f)
print(x.shape, y.shape)

(1517962, 50) (1517962,)


In [3]:
#Training and testing split
x_train, x_test, y_train, y_test = train_test_split(
    x,y,
    test_size = 0.2,
    random_state = 42
)

In [4]:
vocab_size = len(tokenizer.word_index)+1
embedding_dim = 128
max_length = x.shape[1]

model = Sequential()
model.add(Embedding(
    input_dim = vocab_size,
    output_dim = embedding_dim,
    input_length = max_length
  ))

#CNN Layers
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.3))

#LSTM Layers
model.add(LSTM(64,return_sequences=True))
model.add(LSTM(32))
model.add(Dropout(0.3))

#Output layer
model.add(Dense(1,activation='sigmoid'))

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)
model.summary()



In [9]:
callbacks=[
    EarlyStopping(patience=5,restore_best_weights=True),
    ModelCheckpoint('/content/drive/MyDrive/Colab Notebooks/Sentiment Analysis/best_model.h5',save_best_only=True)
]
history=model.fit(
    x_train,y_train,
    validation_split=0.2,
    epochs = 15,
    batch_size=64,
    callbacks=callbacks
)

loss, accuracy = model.evaluate(x_test, y_test, verbose=1)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Epoch 1/15
[1m15180/15180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.7858 - loss: 0.4602



[1m15180/15180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m573s[0m 38ms/step - accuracy: 0.7858 - loss: 0.4602 - val_accuracy: 0.7980 - val_loss: 0.4349
Epoch 2/15
[1m15179/15180[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 36ms/step - accuracy: 0.8108 - loss: 0.4162



[1m15180/15180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m578s[0m 38ms/step - accuracy: 0.8108 - loss: 0.4162 - val_accuracy: 0.8027 - val_loss: 0.4289
Epoch 3/15
[1m15180/15180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m569s[0m 37ms/step - accuracy: 0.8223 - loss: 0.3943 - val_accuracy: 0.8022 - val_loss: 0.4294
Epoch 4/15
[1m15180/15180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m624s[0m 38ms/step - accuracy: 0.8318 - loss: 0.3780 - val_accuracy: 0.8015 - val_loss: 0.4327
Epoch 5/15
[1m15180/15180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m570s[0m 38ms/step - accuracy: 0.8400 - loss: 0.3622 - val_accuracy: 0.8003 - val_loss: 0.4437
Epoch 6/15
[1m15180/15180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m568s[0m 37ms/step - accuracy: 0.8479 - loss: 0.3481 - val_accuracy: 0.7963 - val_loss: 0.4551
Epoch 7/15
[1m15180/15180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m620s[0m 37m

In [10]:
model.save('/content/drive/MyDrive/Colab Notebooks/Sentiment Analysis/final_model.keras')