In [3]:
# Core Libraries
import pandas as pd
import numpy as np

# TensorFlow / Keras Layers
import tensorflow as tf
from tensorflow.keras.models import Sequential as kerasSeq
from tensorflow.keras.layers import (
    Input,
    TextVectorization,
    Embedding,
    Dropout,
    SimpleRNN,
    Dense
)

# Label preprocessing
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

## RNN

### Load Data

In [4]:
df = pd.read_csv('../dataset/train.csv', sep=',')

In [5]:
x_train = np.array(df['text'])
y_train = np.array(df['label'])
x_train[:5]

array(['Nikmati cicilan 0% hingga 12 bulan untuk pemesanan tiket pesawat air asia dengan kartu kredit bni!',
       'Kue-kue yang disajikan bikin saya bernostalgia. Semuanya tipikal kue zaman dulu, baik dari penampilan maupun rasa. Kuenya enak dan harganya juga murah.',
       'Ibu pernah bekerja di grab indonesia',
       'Paling suka banget makan siang di sini ayam sama sambalnya enak banget harganya luar biasa hemat, rasa ayamnya meresap sampai ketulangnya, es lidah buayanya juga segar bikin adem perut setelah makan sambal yang pedas, pelayannya sigap dan ramah, yang aku suka di tempat kasir ada tulisan 10% disumbangkan untuk beramal, buat makan jadi lebih enak ke perut',
       'Pelayanan bus DAMRI sangat baik'], dtype=object)

### Keras Model

In [6]:
# Parameters
max_tokens = 1000     # vocab size
output_sequence_length = 100  # max length of padded sequences
vectorizer = TextVectorization(
    max_tokens=max_tokens,
    output_mode='int',
    output_sequence_length=output_sequence_length
)
vectorizer.adapt(x_train)

2025-05-28 21:06:40.821283: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


In [54]:
model = kerasSeq([
    Input(shape=(1,), dtype=tf.string), 
    vectorizer,                          
    Embedding(input_dim=max_tokens, output_dim=64),
    Dropout(0.3),  
    SimpleRNN(64),                      
    Dense(3, activation='softmax')       
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [55]:
model2 = kerasSeq([
    Input(shape=(1,), dtype=tf.string),          
    vectorizer,                                  
    Embedding(input_dim=max_tokens, output_dim=64),
    Dropout(0.3),
    SimpleRNN(64, return_sequences=True),        
    SimpleRNN(64),                               
    Dense(3, activation='softmax')               
])

model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model2.summary()

In [56]:
model3 = kerasSeq([
    Input(shape=(1,), dtype=tf.string),          
    vectorizer,                                  
    Embedding(input_dim=max_tokens, output_dim=64),
    Dropout(0.3),
    SimpleRNN(64, return_sequences=True),        
    SimpleRNN(64, return_sequences=True),
    SimpleRNN(64),                               
    Dense(3, activation='softmax')               
])

model3.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model3.summary()

In [57]:
# encode target
encoder = LabelEncoder()
y_train_int = encoder.fit_transform(y_train)
y_train_cat = to_categorical(y_train_int, num_classes=3)
tf.constant(x_train)

<tf.Tensor: shape=(500,), dtype=string, numpy=
array([b'Nikmati cicilan 0% hingga 12 bulan untuk pemesanan tiket pesawat air asia dengan kartu kredit bni!',
       b'Kue-kue yang disajikan bikin saya bernostalgia. Semuanya tipikal kue zaman dulu, baik dari penampilan maupun rasa. Kuenya enak dan harganya juga murah.',
       b'Ibu pernah bekerja di grab indonesia',
       b'Paling suka banget makan siang di sini ayam sama sambalnya enak banget harganya luar biasa hemat, rasa ayamnya meresap sampai ketulangnya, es lidah buayanya juga segar bikin adem perut setelah makan sambal yang pedas, pelayannya sigap dan ramah, yang aku suka di tempat kasir ada tulisan 10% disumbangkan untuk beramal, buat makan jadi lebih enak ke perut',
       b'Pelayanan bus DAMRI sangat baik',
       b'Mau bikin postingan yang isinya mengedukasi customers gojek.',
       b'Ratusan rumah di medan terendam banjir',
       b'Barangnya lumayan, cuma yang saya heran xiaomi redmi note 2 ini tombol onnya memang agak ru

### Obtain Weight

In [58]:
model.fit(tf.constant(x_train), y_train_cat, epochs=10, batch_size=4)


Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.3373 - loss: 1.1234
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.3659 - loss: 1.1083
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.3618 - loss: 1.0944
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.3551 - loss: 1.1113
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.3330 - loss: 1.1028
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.3862 - loss: 1.0964
Epoch 7/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.4058 - loss: 1.0930
Epoch 8/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.4035 - loss: 1.0921
Epoch 9/10
[1m125/125[0m [32m━━━━━

<keras.src.callbacks.history.History at 0x78fc5466ce60>

In [59]:
for layer in model.layers:
    print(f"Layer: {layer.name}")
    weights = layer.get_weights()
    for i, w in enumerate(weights):
        print(f"  Weight {i}: shape={w.shape}")
RNN_weights =model.layers[3].get_weights()
print(RNN_weights)

Layer: text_vectorization_2
Layer: embedding_7
  Weight 0: shape=(1000, 64)
Layer: dropout_6
Layer: simple_rnn_12
  Weight 0: shape=(64, 64)
  Weight 1: shape=(64, 64)
  Weight 2: shape=(64,)
Layer: dense_6
  Weight 0: shape=(64, 3)
  Weight 1: shape=(3,)
[array([[ 0.06142019,  0.05974184,  0.0345372 , ..., -0.06316666,
         0.18629572,  0.09448725],
       [-0.17556876,  0.20732793, -0.02594172, ...,  0.00458009,
         0.05992655,  0.02417792],
       [ 0.03675007, -0.06119879, -0.04788189, ...,  0.19027299,
         0.15875663, -0.06513658],
       ...,
       [ 0.1731799 , -0.22558449, -0.16964409, ..., -0.13741863,
        -0.14166792,  0.04210154],
       [-0.02310336, -0.04304823, -0.09023409, ...,  0.15053214,
        -0.22047892, -0.01942348],
       [ 0.19640382,  0.20081545,  0.05697681, ..., -0.14966345,
         0.0308894 , -0.17635949]], dtype=float32), array([[ 0.11680669,  0.03212444, -0.05182948, ..., -0.09043783,
        -0.0008277 ,  0.02528042],
       [ 0.026

In [60]:
model.save("RNN_1_64.h5")



In [61]:
model2.fit(tf.constant(x_train), y_train_cat, epochs=10, batch_size=4)

Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 28ms/step - accuracy: 0.3751 - loss: 1.1499
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.3585 - loss: 1.1179
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.3421 - loss: 1.1389
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.3739 - loss: 1.1038
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.3606 - loss: 1.0966
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.3302 - loss: 1.1066
Epoch 7/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.3277 - loss: 1.1126
Epoch 8/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.3823 - loss: 1.0946
Epoch 9/10
[1m125/125[0m [32m

<keras.src.callbacks.history.History at 0x78fd0074a270>

In [64]:
for layer in model2.layers:
    print(f"Layer: {layer.name}")
    weights = layer.get_weights()
    for i, w in enumerate(weights):
        print(f"  Weight {i}: shape={w.shape}")

Layer: text_vectorization_2
Layer: embedding_8
  Weight 0: shape=(1000, 64)
Layer: dropout_7
Layer: simple_rnn_13
  Weight 0: shape=(64, 64)
  Weight 1: shape=(64, 64)
  Weight 2: shape=(64,)
Layer: simple_rnn_14
  Weight 0: shape=(64, 64)
  Weight 1: shape=(64, 64)
  Weight 2: shape=(64,)
Layer: dense_7
  Weight 0: shape=(64, 3)
  Weight 1: shape=(3,)


In [65]:
model2.save("RNN_2_64.h5")



In [66]:
model3.fit(tf.constant(x_train), y_train_cat, epochs=10, batch_size=4)

Epoch 1/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 27ms/step - accuracy: 0.3085 - loss: 1.1559
Epoch 2/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.4150 - loss: 1.1074
Epoch 3/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.3264 - loss: 1.1305
Epoch 4/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.3411 - loss: 1.1034
Epoch 5/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.4400 - loss: 1.0773
Epoch 6/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.4037 - loss: 1.0857
Epoch 7/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - accuracy: 0.3284 - loss: 1.1010
Epoch 8/10
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.4023 - loss: 1.0690
Epoch 9/10
[1m125/125[0m [32m

<keras.src.callbacks.history.History at 0x78fc686cc170>

In [67]:
for layer in model3.layers:
    print(f"Layer: {layer.name}")
    weights = layer.get_weights()
    for i, w in enumerate(weights):
        print(f"  Weight {i}: shape={w.shape}")

Layer: text_vectorization_2
Layer: embedding_9
  Weight 0: shape=(1000, 64)
Layer: dropout_8
Layer: simple_rnn_15
  Weight 0: shape=(64, 64)
  Weight 1: shape=(64, 64)
  Weight 2: shape=(64,)
Layer: simple_rnn_16
  Weight 0: shape=(64, 64)
  Weight 1: shape=(64, 64)
  Weight 2: shape=(64,)
Layer: simple_rnn_17
  Weight 0: shape=(64, 64)
  Weight 1: shape=(64, 64)
  Weight 2: shape=(64,)
Layer: dense_8
  Weight 0: shape=(64, 3)
  Weight 1: shape=(3,)


In [68]:
model3.save("RNN_3_64.h5")



In [10]:
dftest = pd.read_csv('../dataset/test.csv', sep=',')
x_test = np.array(dftest['text'])

In [70]:
print(model.predict(x_test))

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step
[[0.31023914 0.25134856 0.43841228]
 [0.31023923 0.25134844 0.43841246]
 [0.31023914 0.25134853 0.43841246]
 ...
 [0.31023943 0.25134712 0.4384135 ]
 [0.31023914 0.2513485  0.43841246]
 [0.31023917 0.2513485  0.43841246]]


In [72]:
print(model2.predict(x_test))

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[[0.4234361  0.16675611 0.4098078 ]
 [0.42353132 0.16676573 0.40970296]
 [0.42347407 0.1667763  0.40974966]
 ...
 [0.42327884 0.16680185 0.4099193 ]
 [0.4235398  0.1667231  0.40973705]
 [0.42356166 0.16675247 0.40968588]]


## Nyoba2

In [7]:
import sequential
print(sequential.__file__)

/home/sean-nugroho/Documents/Semester6/MachineLearning/Tugas-Machine-Learning-2/src/sequential.py


In [8]:
from sequential import Sequential
from layers.text_vectorization import TextVectorizationWrapper
from layers.embedding import EmbeddingWrapper
from layers.dense import Dense
from layers.dropout import Dropout
from layers.simple_rnn import SimpleRNN

rnn_1_64 = Sequential([
    TextVectorizationWrapper(TextVectorizer= vectorizer),
    EmbeddingWrapper(input_dim=1000, output_dim=64),
    Dropout(),
    SimpleRNN(units=64,activation="tanh"),
    Dense(3,activation="softmax")
])

rnn_1_64.load_weights("RNN_1_64.h5")

HDF5 layer keys: ['dense_6', 'dropout_6', 'embedding_7', 'simple_rnn_12', 'text_vectorization_2', 'top_level_model_weights']
Custom layer 0 (type: TextVectorizationWrapper) has no 'key' attribute, skipping.
Loading 1 weight array(s) into layer 1 (type: EmbeddingWrapper, key: embedding_7)
Loading 0 weight array(s) into layer 2 (type: Dropout, key: dropout_6)
Dropout has no trainable weights — skipping
Loading 3 weight array(s) into layer 3 (type: SimpleRNN, key: simple_rnn_12)
Weight successfuly loaded
Loading 2 weight array(s) into layer 4 (type: Dense, key: dense_6)


In [11]:
output = rnn_1_64.predict(x_test)
print("Output shape:", output.shape)
print("Output:\n", output)

Output shape: (400, 3)
Output:
 [[0.3102391  0.2513485  0.43841241]
 [0.31023916 0.25134839 0.43841245]
 [0.3102391  0.25134849 0.43841241]
 ...
 [0.31023939 0.25134718 0.43841343]
 [0.3102391  0.25134849 0.43841241]
 [0.31023911 0.25134846 0.43841243]]


In [12]:
rnn_2_64 = Sequential([
    TextVectorizationWrapper(TextVectorizer= vectorizer),
    EmbeddingWrapper(input_dim=1000, output_dim=64),
    Dropout(),
    SimpleRNN(units=64,activation="tanh",return_sequences=True),
    SimpleRNN(units=64,activation="tanh"),
    Dense(3,activation="softmax")
])
rnn_2_64.load_weights("RNN_2_64.h5")

HDF5 layer keys: ['dense_7', 'dropout_7', 'embedding_8', 'simple_rnn_13', 'simple_rnn_14', 'text_vectorization_2', 'top_level_model_weights']
Custom layer 0 (type: TextVectorizationWrapper) has no 'key' attribute, skipping.
Loading 1 weight array(s) into layer 1 (type: EmbeddingWrapper, key: embedding_8)
Loading 0 weight array(s) into layer 2 (type: Dropout, key: dropout_7)
Dropout has no trainable weights — skipping
Loading 3 weight array(s) into layer 3 (type: SimpleRNN, key: simple_rnn_13)
Weight successfuly loaded
Loading 3 weight array(s) into layer 4 (type: SimpleRNN, key: simple_rnn_14)
Weight successfuly loaded
Loading 2 weight array(s) into layer 5 (type: Dense, key: dense_7)


In [13]:
output = rnn_2_64.predict(x_test)
print("Output shape:", output.shape)
print("Output:\n", output)

Output shape: (400, 3)
Output:
 [[0.42343614 0.16675607 0.40980779]
 [0.42353142 0.16676579 0.4097028 ]
 [0.42347402 0.16677636 0.40974962]
 ...
 [0.42327882 0.16680186 0.40991932]
 [0.42353987 0.16672312 0.40973701]
 [0.42356163 0.16675245 0.40968591]]


In [15]:
rnn_3_64 = Sequential([
    TextVectorizationWrapper(TextVectorizer= vectorizer),
    EmbeddingWrapper(input_dim=1000, output_dim=64),
    Dropout(),
    SimpleRNN(units=64,activation="tanh",return_sequences=True),
    SimpleRNN(units=64,activation="tanh",return_sequences=True),
    SimpleRNN(64),
    Dense(3,activation="softmax")
])
rnn_3_64.load_weights("RNN_3_64.h5")

HDF5 layer keys: ['dense_8', 'dropout_8', 'embedding_9', 'simple_rnn_15', 'simple_rnn_16', 'simple_rnn_17', 'text_vectorization_2', 'top_level_model_weights']
Custom layer 0 (type: TextVectorizationWrapper) has no 'key' attribute, skipping.
Loading 1 weight array(s) into layer 1 (type: EmbeddingWrapper, key: embedding_9)
Loading 0 weight array(s) into layer 2 (type: Dropout, key: dropout_8)
Dropout has no trainable weights — skipping
Loading 3 weight array(s) into layer 3 (type: SimpleRNN, key: simple_rnn_15)
Weight successfuly loaded
Loading 3 weight array(s) into layer 4 (type: SimpleRNN, key: simple_rnn_16)
Weight successfuly loaded
Loading 3 weight array(s) into layer 5 (type: SimpleRNN, key: simple_rnn_17)
Weight successfuly loaded
Loading 2 weight array(s) into layer 6 (type: Dense, key: dense_8)


In [16]:
output = rnn_3_64.predict(x_test)
print("Output shape:", output.shape)
print("Output:\n", output)

Output shape: (400, 3)
Output:
 [[0.31278955 0.30902423 0.37818623]
 [0.31321605 0.3016501  0.38513384]
 [0.30946401 0.30050914 0.39002685]
 ...
 [0.30062091 0.31140323 0.38797586]
 [0.31370698 0.29849796 0.38779506]
 [0.31160333 0.31856062 0.36983604]]
