In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
#from tensorflow.keras.preprocessing.text import texts_to_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

In [4]:
# Sample text data and labels
texts = [
	"I love natural language processing.",
	"Natural language processing is fascinating.",
	"NLP is a subfield of artificial intelligence.",
	"I dislike bugs in my code.",
	"Sentiment analysis is not always easy."
]

labels = [1, 1, 1, 0, 0]  # 1 for positive, 0 for negative

In [6]:
# Tokenize and convert text to matrix (binary encoding)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
X = tokenizer.texts_to_matrix(texts, mode='binary')

#The texts_to_matrix function converts each text directly into a matrix, where each row corresponds to a text, and each 
#column represents the presence or absence of a word in the vocabulary. This is a simpler representation compared to word 
#embeddings.
print(X)
# Pad sequences
X = pad_sequences(X, maxlen=X.shape[1], padding='post', truncating='post', value=0)

# Convert labels to one-hot encoding
y = tf.keras.utils.to_categorical(labels, num_classes=2)


[[0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 1. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1.]]


In [7]:
# Build a simple neural network model
model = Sequential()
model.add(Dense(64, input_dim=X.shape[1], activation='relu'))
model.add(Dense(2, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7ff2d9517d30>

In [9]:
# Save the model
model.save('models/smpl_senti_2_cls.keras')
# Load the model
loaded_model = tf.keras.models.load_model('models/smpl_senti_2_cls.keras')

In [11]:
# Use the loaded model for predictions on new data
new_texts = ["I enjoy working with natural language processing.", "This movie is terrible."]

# Convert text to matrix for new data
new_X = tokenizer.texts_to_matrix(new_texts, mode='binary')

# Pad sequences for new data
new_X = pad_sequences(new_X, maxlen=X.shape[1], padding='post', truncating='post', value=0)

# Make predictions
predictions = loaded_model.predict(new_X)

# Convert the predictions to labels (0 for negative, 1 for positive)
predicted_labels = (predictions[:, 1] > 0.5).astype(int)

# Display the results
for text, label in zip(new_texts, predicted_labels):
	sentiment = "Positive" if label == 1 else "Negative"
	print(f"Text: {text} | Sentiment: {sentiment}")

Text: I enjoy working with natural language processing. | Sentiment: Positive
Text: This movie is terrible. | Sentiment: Negative
