In [1]:
# ---- Imports ----
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense

# ---- Dataset ----
# Use dummy data (no CSV)
data = {
    'text': [
        'This is the first document.',
        'This document is the second document.',
        'And this is the third document.',
        'Is this the first document?',
        'Deep learning for text classification.',
        'Another example of a text sample.',
        'Machine learning and neural networks.',
        'This sample belongs to class one.'
    ],
    'label': [0, 0, 1, 1, 1, 0, 1, 0]
}
df = pd.DataFrame(data)
texts = df['text'].tolist()
labels = df['label'].tolist()

# Tokenize & pad
max_words = 1000
tokenizer = Tokenizer(num_words=max_words, oov_token="")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

max_sequence_length = 20
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post', truncating='post')

X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# ---- Program / Model ----
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 16

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_length),
    GlobalAveragePooling1D(),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

# ---- Compile ----
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# ---- Train ----
y_train = np.array(y_train)
y_test = np.array(y_test)
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test), verbose=2)

# ---- Evaluation ----
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

# ---- Summary ----
model.summary()

Epoch 1/10




1/1 - 2s - 2s/step - accuracy: 0.3333 - loss: 0.6981 - val_accuracy: 1.0000 - val_loss: 0.6878
Epoch 2/10
1/1 - 0s - 245ms/step - accuracy: 0.3333 - loss: 0.6959 - val_accuracy: 0.5000 - val_loss: 0.6926
Epoch 3/10
1/1 - 0s - 301ms/step - accuracy: 0.1667 - loss: 0.6938 - val_accuracy: 0.0000e+00 - val_loss: 0.6973
Epoch 4/10
1/1 - 0s - 138ms/step - accuracy: 0.6667 - loss: 0.6920 - val_accuracy: 0.0000e+00 - val_loss: 0.7008
Epoch 5/10
1/1 - 0s - 145ms/step - accuracy: 0.6667 - loss: 0.6903 - val_accuracy: 0.0000e+00 - val_loss: 0.7043
Epoch 6/10
1/1 - 0s - 118ms/step - accuracy: 0.6667 - loss: 0.6890 - val_accuracy: 0.0000e+00 - val_loss: 0.7078
Epoch 7/10
1/1 - 0s - 115ms/step - accuracy: 0.6667 - loss: 0.6876 - val_accuracy: 0.0000e+00 - val_loss: 0.7115
Epoch 8/10
1/1 - 0s - 127ms/step - accuracy: 0.6667 - loss: 0.6863 - val_accuracy: 0.0000e+00 - val_loss: 0.7152
Epoch 9/10
1/1 - 0s - 121ms/step - accuracy: 0.6667 - loss: 0.6849 - val_accuracy: 0.0000e+00 - val_loss: 0.7191
Epoch