In [1]:
from google.colab import drive

# u/ dataframe
import pandas as pd
import re


# u/ split data
from sklearn.model_selection import train_test_split

# u/ preprocessing dan layer
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import LSTM,Dense,Embedding,Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

# u/ visualisasi plot
import matplotlib.pyplot as plt

In [2]:
drive.mount('/content/drive/')

Mounted at /content/drive/


In [3]:
df = pd.read_csv("/content/drive/My Drive/dataset/Fake.csv")
df.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [None]:
df = df.drop(['title', 'date'], axis=1)
df.head()

Unnamed: 0,text,subject
0,Donald Trump just couldn t wish all Americans ...,News
1,House Intelligence Committee Chairman Devin Nu...,News
2,"On Friday, it was revealed that former Milwauk...",News
3,"On Christmas day, Donald Trump announced that ...",News
4,Pope Francis used his annual Christmas Day mes...,News


In [None]:
df['subject'].value_counts()


News               9050
politics           6841
left-news          4459
Government News    1570
US_News             783
Middle-east         778
Name: subject, dtype: int64

In [None]:
df['subject'].value_counts()

News               9050
politics           6841
left-news          4459
Government News    1570
US_News             783
Middle-east         778
Name: subject, dtype: int64

In [None]:
df = df[~df['subject'].isin(['News','politics','left-news','Government News'])]
df['subject'].value_counts()

US_News        783
Middle-east    778
Name: subject, dtype: int64

In [None]:
df['text'] = df['text'].map(lambda x: re.sub(r'\W+', ' ', x))
# drop kolom id dan text lama
df.head()

Unnamed: 0,text,subject
21920,Patrick Henningsen and Shawn Helton 21st Centu...,US_News
21921,Looking back on the 2016 Presidential election...,US_News
21922,It should also be said that the current US Adm...,US_News
21923,170619 N AA175 092 SAN DIEGO June 19 2017 Comm...,US_News
21924,Tune in to the Alternate Current Radio Network...,US_News


In [None]:
df.isnull().values.any()

False

In [None]:
subject = pd.get_dummies(df.subject)
df_subject = pd.concat([df, subject], axis=1)
df_subject = df_subject.drop(columns='subject')
df_subject.head()

Unnamed: 0,text,Middle-east,US_News
21920,Patrick Henningsen and Shawn Helton 21st Centu...,0,1
21921,Looking back on the 2016 Presidential election...,0,1
21922,It should also be said that the current US Adm...,0,1
21923,170619 N AA175 092 SAN DIEGO June 19 2017 Comm...,0,1
21924,Tune in to the Alternate Current Radio Network...,0,1


In [None]:
# Mengubah tipe data menjadi str dan numpy array 
text = df_subject['text'].astype(str)
label = df_subject[['US_News', 'Middle-east' ]].values

In [None]:
subject_train, subject_test, label_train, label_test = train_test_split(text, label, test_size = 0.2)

In [None]:
tokenizer = Tokenizer(num_words=5000, oov_token='x')
tokenizer.fit_on_texts(subject_train) 
tokenizer.fit_on_texts(subject_test)
 
sekuens_train = tokenizer.texts_to_sequences(subject_train)
sekuens_test = tokenizer.texts_to_sequences(subject_test)
 
padded_train = pad_sequences(sekuens_train) 
padded_test = pad_sequences(sekuens_test)

In [None]:
import tensorflow as tf
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=6000, output_dim=16),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(2, activation='softmax')
])
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
num_epochs = 15
history = model.fit(padded_train, label_train, epochs=num_epochs, validation_data=(padded_test, label_test), verbose=2)

Epoch 1/15
39/39 - 339s - loss: 0.6938 - accuracy: 0.4864 - val_loss: 0.6930 - val_accuracy: 0.5112 - 339s/epoch - 9s/step
Epoch 2/15
39/39 - 337s - loss: 0.6935 - accuracy: 0.4992 - val_loss: 0.6931 - val_accuracy: 0.5112 - 337s/epoch - 9s/step
Epoch 3/15
39/39 - 338s - loss: 0.6933 - accuracy: 0.4992 - val_loss: 0.6932 - val_accuracy: 0.4185 - 338s/epoch - 9s/step
Epoch 4/15
39/39 - 337s - loss: 0.6934 - accuracy: 0.5024 - val_loss: 0.6935 - val_accuracy: 0.4888 - 337s/epoch - 9s/step
Epoch 5/15
39/39 - 338s - loss: 0.6935 - accuracy: 0.4856 - val_loss: 0.6933 - val_accuracy: 0.4888 - 338s/epoch - 9s/step
Epoch 6/15
39/39 - 335s - loss: 0.6933 - accuracy: 0.5008 - val_loss: 0.6934 - val_accuracy: 0.4888 - 335s/epoch - 9s/step
Epoch 7/15
39/39 - 336s - loss: 0.6933 - accuracy: 0.4864 - val_loss: 0.6932 - val_accuracy: 0.5112 - 336s/epoch - 9s/step
Epoch 8/15
39/39 - 341s - loss: 0.6933 - accuracy: 0.5008 - val_loss: 0.6936 - val_accuracy: 0.4824 - 341s/epoch - 9s/step
Epoch 9/15
39/39

In [None]:
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('accuracy')>0.9 and logs.get('val_accuracy')>0.9):
      print("\nAkurasi train dan validasi didapat telah mencapai nilai > 90%!")
      self.model.stop_training = True
callbacks = myCallback()

In [None]:
num_epochs = 30
history = model.fit(padded_train, label_train, epochs=num_epochs, validation_data=(padded_test, label_test), verbose=2, callbacks=[callbacks])

Epoch 1/30
39/39 - 359s - loss: 0.6930 - accuracy: 0.5184 - val_loss: 0.7083 - val_accuracy: 0.3578 - 359s/epoch - 9s/step
Epoch 2/30
39/39 - 353s - loss: 0.6938 - accuracy: 0.4912 - val_loss: 0.6958 - val_accuracy: 0.4856 - 353s/epoch - 9s/step
Epoch 3/30
39/39 - 351s - loss: 0.6927 - accuracy: 0.5200 - val_loss: 0.7120 - val_accuracy: 0.3834 - 351s/epoch - 9s/step
Epoch 4/30
39/39 - 352s - loss: 0.6963 - accuracy: 0.5216 - val_loss: 0.7034 - val_accuracy: 0.4824 - 352s/epoch - 9s/step
Epoch 5/30
39/39 - 353s - loss: 0.6921 - accuracy: 0.5112 - val_loss: 0.7051 - val_accuracy: 0.3738 - 353s/epoch - 9s/step
Epoch 6/30
39/39 - 352s - loss: 0.6914 - accuracy: 0.5160 - val_loss: 0.7114 - val_accuracy: 0.3482 - 352s/epoch - 9s/step
Epoch 7/30
39/39 - 352s - loss: 0.6895 - accuracy: 0.5272 - val_loss: 0.7645 - val_accuracy: 0.2588 - 352s/epoch - 9s/step
Epoch 8/30
39/39 - 351s - loss: 0.6907 - accuracy: 0.5393 - val_loss: 0.6956 - val_accuracy: 0.4792 - 351s/epoch - 9s/step
Epoch 9/30
39/39