In [1]:
import pandas as pd #Untuk pembuatan dataframe
import numpy as np #Untuk melakukan komputasi numerik
import keras_tuner as kt #Untuk Import Hyperparameter tuner
from tensorflow import keras #Membuat model neural network
from matplotlib import pyplot as plt #visualisasi data
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score 
from keras.layers import TextVectorization #Untuk teks vektorisasi
from keras.models import Sequential 
from keras.layers import Embedding, Bidirectional, Input, LSTM, Dense, Dropout 
from keras.utils import to_categorical

In [15]:
df = pd.read_csv('news.csv', encoding="utf-8")
df.head()

Unnamed: 0.1,Unnamed: 0,title,text,label
0,8476,You Can Smell Hillary’s Fear,"Daniel Greenfield, a Shillman Journalism Fello...",FAKE
1,10294,Watch The Exact Moment Paul Ryan Committed Pol...,Google Pinterest Digg Linkedin Reddit Stumbleu...,FAKE
2,3608,Kerry to go to Paris in gesture of sympathy,U.S. Secretary of State John F. Kerry said Mon...,REAL
3,10142,Bernie supporters on Twitter erupt in anger ag...,"— Kaydee King (@KaydeeKing) November 9, 2016 T...",FAKE
4,875,The Battle of New York: Why This Primary Matters,It's primary day in New York and front-runners...,REAL


In [16]:
df.columns

Index(['Unnamed: 0', 'title', 'text', 'label'], dtype='object')

In [17]:
del df['Unnamed: 0']
del df['title']
# del df['ID']
# del df['tanggal']
# del df['judul']
# del df['nama file gambar']
df.columns

Index(['text', 'label'], dtype='object')

In [18]:
label_map = { 'REAL': 1, 'FAKE': 0 }

In [None]:
X = np.array(df['text'])
# X = np.array(df['narasi'])
X

In [20]:
y = to_categorical(np.array(df['label'].factorize()[0])).astype('int32') 
#Meklasifikasi data real dan fake menjadi benbentuk array 2 dimensi
y

array([[1, 0],
       [1, 0],
       [0, 1],
       ...,
       [1, 0],
       [0, 1],
       [0, 1]])

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
#Penentuan Parameter data testing 10% dari total data

In [22]:
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)
#mengetahui total data

(5701,) (634,) (5701, 2) (634, 2)


In [10]:
VOCAB_SIZE = 10000
MAX_SEQUENCE_LENGTH = 250

text_vectorizer = TextVectorization(
  standardize="lower_and_strip_punctuation",
  max_tokens=VOCAB_SIZE,
  output_mode='int',
  output_sequence_length=MAX_SEQUENCE_LENGTH)

In [11]:
text_vectorizer.adapt(X)

In [12]:
def build_model(hp):
    model = Sequential(name="fake_news")
    model.add(Input(shape=(1,), dtype="string"))
    model.add(text_vectorizer)
    lstm_units = hp.Choice(f"lstm_units", [128, 256, 512, 1024])
    model.add(Embedding(input_dim=len(text_vectorizer.get_vocabulary()) + 1,
                        output_dim=lstm_units*2,
                        embeddings_initializer="uniform",
                        mask_zero=True,
                        input_length=MAX_SEQUENCE_LENGTH))
    model.add(Bidirectional(LSTM(lstm_units, dropout=hp.Float("lstm_dropout_rate", min_value=0.1, max_value=0.5, step=0.1))))
    model.add(Dense(2, activation="softmax"))
    
    model.compile(
        loss="categorical_crossentropy",
        optimizer="adam",
        metrics=["categorical_accuracy"])

    return model

In [13]:
tuner = kt.Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=50,
    factor=3,
    overwrite=True)

In [15]:
tuner.search(X_train, y_train, validation_split=0.1)

Trial 5 Complete [00h 05m 14s]
val_loss: 0.41962966322898865

Best val_loss So Far: 0.39534783363342285
Total elapsed time: 01h 10m 30s
INFO:tensorflow:Oracle triggered exit


In [16]:
tuner.results_summary(1)

Results summary
Results in .\untitled_project
Showing 1 best trials
<keras_tuner.engine.objective.Objective object at 0x000001DE4A348B38>
Trial summary
Hyperparameters:
lstm_units: 96
dense_units: 96
dropout: False
Score: 0.39534783363342285


In [17]:
best_hp = tuner.get_best_hyperparameters()[0]

In [18]:
best_hp.values

{'lstm_units': 96, 'dense_units': 96, 'dropout': False}

In [19]:
model = build_model(best_hp)
model.summary()

Model: "fake_news"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, 250)              0         
 torization)                                                     
                                                                 
 embedding_1 (Embedding)     (None, 250, 192)          1920192   
                                                                 
 bidirectional_1 (Bidirectio  (None, 192)              221952    
 nal)                                                            
                                                                 
 dense_2 (Dense)             (None, 96)                18528     
                                                                 
 dense_3 (Dense)             (None, 2)                 194       
                                                                 
Total params: 2,160,866
Trainable params: 2,160,866
Non-t

In [21]:
history = model.fit(X_train, y_train, validation_split=0.1, epochs=50)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [22]:
model.save('uji11.tf')



INFO:tensorflow:Assets written to: uji6.tf\assets


INFO:tensorflow:Assets written to: uji6.tf\assets


In [6]:
def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_'+metric], '')
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_'+metric])

In [None]:
plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plot_graphs(history, 'categorical_accuracy')
plt.ylim(None, 1)
plt.subplot(1, 2, 2)
plot_graphs(history, 'loss')
plt.ylim(0, None)

In [23]:
from tensorflow import keras
model = keras.models.load_model('uji11.tf')



In [24]:
pred = model.predict(X_test)
pred



array([[1.2405412e-11, 1.0000000e+00],
       [4.1676441e-07, 9.9999964e-01],
       [2.7297294e-01, 7.2702706e-01],
       ...,
       [1.9708804e-11, 1.0000000e+00],
       [1.0000000e+00, 7.0521269e-11],
       [1.0000000e+00, 6.1737761e-11]], dtype=float32)

In [25]:
y_pred = np.argmax(pred, axis=1)
y_pred

array([1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0,
       1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0,
       0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1,
       1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1,
       1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0,

In [26]:
y_true = np.argmax(y_test, axis=1)
y_true


array([1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0,
       1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0,
       1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1,
       1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1,
       1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0,

In [27]:
accuracy_score(y_true, y_pred)

0.9826498422712934

In [28]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
cm = confusion_matrix(y_true, y_pred)
cmp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["FAKE","REAL"])
fig, ax = plt.subplots(figsize=(8, 8))
cmp.plot(ax=ax)

ImportError: cannot import name 'ConfusionMatrixDisplay' from 'sklearn.metrics' (c:\Users\HP\Anaconda3\lib\site-packages\sklearn\metrics\__init__.py)