<a href="https://colab.research.google.com/github/bosiqbal/fileujian/blob/main/deteksi_berita.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# === [1] Import Library ===
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense

# === [2] Dataset Berita (20 Data) ===
data = {
    'text': [
        "Presiden umumkan libur nasional",
        "Artis terkenal hidup kembali dari kubur",
        "BMKG: Gempa besar akan terjadi besok",
        "Mendikbud umumkan kurikulum baru",
        "Kucing bisa berbicara mulai tahun depan",
        "Polisi tangkap pelaku korupsi triliunan",
        "Ilmuwan temukan planet mirip bumi",
        "Sate bisa sembuhkan semua penyakit",
        "Menlu kirim bantuan ke Palestina",
        "NASA ungkap alien mendarat di Bandung",
        "Vaksin baru 100% efektif",
        "Orang hilang muncul di dunia paralel",
        "Program bantuan PKH tahap 2 sudah cair",
        "Bayi lahir bisa bicara dan prediksi masa depan",
        "Pemerintah umumkan subsidi BBM naik",
        "Gunung Semeru akan meledak dalam 2 hari",
        "Mahasiswa temukan energi dari air",
        "Minum kopi bisa bikin hidup abadi",
        "Indonesia jadi tuan rumah Olimpiade 2032",
        "Telur bisa menyembuhkan kanker stadium akhir"
    ],
    'label': [
        'asli', 'hoaks', 'hoaks', 'asli', 'hoaks',
        'asli', 'asli', 'hoaks', 'asli', 'hoaks',
        'asli', 'hoaks', 'asli', 'hoaks', 'asli',
        'hoaks', 'asli', 'hoaks', 'asli', 'hoaks'
    ]
}

df = pd.DataFrame(data)

# === [3] Tokenisasi & Padding ===
tokenizer = Tokenizer(num_words=1000, oov_token="<OOV>")
tokenizer.fit_on_texts(df['text'])

sequences = tokenizer.texts_to_sequences(df['text'])
padded = pad_sequences(sequences, maxlen=20, padding='post')

# === [4] Label Encode: hoaks = 1, asli = 0 ===
le = LabelEncoder()
labels = le.fit_transform(df['label'])  # 'hoaks' → 1, 'asli' → 0

# === [5] Split Data ===
X_train, X_test, y_train, y_test = train_test_split(padded, labels, test_size=0.2, random_state=42)

# === [6] Model CNN ===
model = Sequential([
    Embedding(input_dim=1000, output_dim=16, input_length=20),
    Conv1D(32, 3, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(10, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# === [7] Training Model ===
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

# === [8] Evaluasi Model ===
loss, accuracy = model.evaluate(X_test, y_test)
print(f"\n🎯 Akurasi model: {accuracy * 100:.2f}%")

# === [9] Prediksi Manual ===
berita_baru = ["Menlu kirim bantuan ke Palestina"]
sequence = tokenizer.texts_to_sequences(berita_baru)
padded_baru = pad_sequences(sequence, maxlen=20, padding='post')
prediksi = model.predict(padded_baru)

hasil = "hoaks" if prediksi[0][0] > 0.5 else "asli"
print(f"\n🧠 Berita: {berita_baru[0]}")
print(f"🔎 Prediksi: {hasil}")




Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.5625 - loss: 0.6921 - val_accuracy: 0.2500 - val_loss: 0.7268
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.5625 - loss: 0.6863 - val_accuracy: 0.2500 - val_loss: 0.7284
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step - accuracy: 0.5625 - loss: 0.6810 - val_accuracy: 0.2500 - val_loss: 0.7302
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - accuracy: 0.5625 - loss: 0.6758 - val_accuracy: 0.2500 - val_loss: 0.7318
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step - accuracy: 0.5625 - loss: 0.6707 - val_accuracy: 0.2500 - val_loss: 0.7331
Epoch 6/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step - accuracy: 0.5625 - loss: 0.6657 - val_accuracy: 0.2500 - val_loss: 0.7343
Epoch 7/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━