In [13]:
import tensorflow as tf
from transformers import AutoTokenizer, TFBertForSequenceClassification, AdamW
from datasets import load_dataset, Dataset
import import_ipynb         # Untuk Import file ipynb
import preprocess

In [None]:
name = "Farid Hasfindra"

# Langkah 3: Memuat Model BERT Pretrained untuk Klasifikasi Sentimen
model_name = preprocess.model_name
tokenized_datasets = preprocess.tokenized_datasets
tokenizer = preprocess.tokenizer

model = TFBertForSequenceClassification.from_pretrained(model_name, num_labels=1)

# Langkah 4: Mengonversi dataset ke format TensorFlow
train_dataset = tokenized_datasets['train'].to_tf_dataset(
    columns=["input_ids", "attention_mask"],
    label_cols=["label"],
    batch_size=16,
    shuffle=True,       # Mengacak urutan data, untuk mencegah overfitting
)

val_dataset = tokenized_datasets['test'].to_tf_dataset(
    columns=["input_ids", "attention_mask"],
    label_cols=["label"],
    batch_size=16,
)

# Langkah 5: Menyiapkan Optimizer, Loss Function dan metrik
optimizer = tf.keras.optimizers.AdamW(learning_rate=2e-5, weight_decay=0.01)
loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
metrics = [tf.keras.metrics.BinaryAccuracy()]

# Kompilasi model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

# Langkah 6: Melatih Model
model.fit(train_dataset, validation_data=val_dataset, epochs=3)

In [None]:
# Langkah 7: Evaluasi Model
results = model.evaluate(val_dataset)
print(f"Validation loss: {results[0]}, Validation accuracy: {results[1]}")

In [None]:
# Langkah 8: Prediksi
kalimat = "I love this movie!"
inputs = tokenizer(kalimat, return_tensors="tf")
logits = model(inputs).logits
predicted_class = tf.argmax(logits, axis=-1)

print(f"Predicted sentiment: {'Positive' if predicted_class == 1 else 'Negative'}")

In [None]:
model.save("classification_sentiment_model", save_format="tf")