In [13]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import numpy as np

In [14]:
# sample data
sentences = [
    "I love machine learning",
    "Deep learning models are powerful",
    "Natural language processing with LSTM",
    "I enjoy learning new things",
    "This is an example of text classification",
    "Understanding deep learning is important",
    "Natural language processing is a fascinating field",
    "LSTM networks are great for sequence data",
    "Machine learning provides valuable insights",
    "Deep learning techniques are evolving"
]

# labels  (binary classification)
labels = [1, 1, 1, 0, 0, 1, 1, 1, 0, 1]

In [15]:
# tokenisasi dan padding sequences (mengubah text menjadi padded sequence int)
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences)

In [16]:
# define vocab size dan dimensi embedding
vocab_size = len(tokenizer.word_index) + 1  # tambah 1 untuk padding
embed_dim = 8

In [17]:
# build LSTM model
model = Sequential()
# mengubah int menjadi dense vector
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=padded_sequences.shape[1]))
# memproses embedded sequence
model.add(LSTM(units=64))
# mengeluarkan binary classification
model.add(Dense(units=1, activation='sigmoid'))

In [18]:
# compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [19]:
# model summary
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 7, 8)              312       
                                                                 
 lstm_1 (LSTM)               (None, 64)                18688     
                                                                 
 dense_1 (Dense)             (None, 1)                 65        
                                                                 
Total params: 19,065
Trainable params: 19,065
Non-trainable params: 0
_________________________________________________________________


In [20]:
# train model dengan 10 epochs
model.fit(padded_sequences, np.array(labels), epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2281db25de0>

In [21]:
# evaluasi model dibandingkan dengan label pada awal.
loss, accuracy = model.evaluate(padded_sequences, np.array(labels), verbose=1)
print(f'Accuracy: {accuracy}')

Accuracy: 0.699999988079071
