In [None]:
#!pip install transformers

In [None]:
import numpy as np
from transformers import BertTokenizer, TFBertForSequenceClassification
import tensorflow as tf
from sklearn.metrics import accuracy_score

# Dataset sederhana
texts = ["I love this movie!", "This movie is terrible.", "Great acting and story.", "Very boring film."]
labels = [1, 0, 1, 0]  # 1 = Positif, 0 = Negatif

# Langkah 1: Load tokenizer dan model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Langkah 2: Tokenisasi data
inputs = tokenizer(texts, padding=True, truncation=True, return_tensors='tf', max_length=128)
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']
labels = tf.convert_to_tensor(labels)

# Langkah 3: Kompilasi model
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)
model.compile(optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])

# Langkah 4: Fine-tune model
model.fit({'input_ids': input_ids, 'attention_mask': attention_mask}, labels, epochs=3, batch_size=2, verbose=1)

# Langkah 5: Prediksi
logits = model.predict({'input_ids': input_ids, 'attention_mask': attention_mask})[0]
y_pred = np.argmax(logits, axis=1)
accuracy = accuracy_score(labels, y_pred)

# Langkah 6: Prediksi pada kalimat baru
new_text = ["This film is amazing!"]
new_inputs = tokenizer(new_text, padding=True, truncation=True, return_tensors='tf', max_length=128)
new_logits = model.predict({'input_ids': new_inputs['input_ids'], 'attention_mask': new_inputs['attention_mask']})[0]
new_pred = np.argmax(new_logits, axis=1)

# Cetak hasil
print("\n=== Hasil Evaluasi ===")
print("Prediksi:", y_pred)
print("Akurasi:", accuracy)
print("\n=== Prediksi Kalimat Baru ===")
print("Kalimat:", new_text[0])
print("Sentimen:", "Positif" if new_pred[0] == 1 else "Negatif")