In [0]:
!pip install plaidml-keras plaidbench

In [0]:
!plaidml-setup

In [0]:
# GPU Acceleration
import plaidml.keras
plaidml.keras.install_backend()

In [0]:
# standard import
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
from keras.utils import np_utils
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
import matplotlib.pyplot as plt

In [0]:
# important features function
def most_informative_feature(vectorizer, classifier, n=10):
    class_labels = classifier.classes_
    feature_names = vectorizer.get_feature_names()
    class1 = sorted(zip(classifier.coef_[0], feature_names))[:n]
    class2 = sorted(zip(classifier.coef_[0], feature_names))[-n:]
    print("Important FAKE news features")
    for coef, feat in class1:
        print(class_labels[0], feat)
    print()
    print("Important REAL news features")
    for coef, feat in reversed(class2):  # reversed order
        print(class_labels[1], feat)


# scorer function
def scorer(confusion_m):
    tn, fp, fn, tp = confusion_m.ravel()
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1_score = (2 * precision * recall) / (precision + recall)
    print("Precision is: %0.3f" % precision)
    print("Recall is: %0.3f" % recall)
    print("F-1 Score is: %0.3f" % f1_score)
    print()

In [0]:
# read data
df = pd.read_csv('fake_or_real_news.csv')
df = df.set_index('Unnamed: 0')
y = df.label
df = df.drop('label', axis=1)
X_train, X_test, y_train, y_test = train_test_split(df['text'], y, test_size=0.3, random_state=50)

In [0]:
maxlen = 3000
vocab_size = 80000
num_classes = 2
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(X_train.to_numpy())
X_train = tokenizer.texts_to_sequences(X_train.to_numpy())
X_test = tokenizer.texts_to_sequences(X_test.to_numpy())
X_train = pad_sequences(X_train, maxlen=maxlen, padding='post')
X_test = pad_sequences(X_test, maxlen=maxlen, padding='post')

y_train = np_utils.to_categorical(y_train.factorize()[0], num_classes)
y_test = np_utils.to_categorical(y_test.factorize()[0], num_classes)

In [0]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Embedding
from keras.layers import Conv1D
from keras.layers import GlobalMaxPooling1D
from keras.callbacks import EarlyStopping

In [15]:
# Create the model
model = Sequential()
model.add(Embedding(vocab_size, 3, input_length=maxlen))
model.add(Dropout(0.5))
model.add(Conv1D(32, 8, padding='valid', activation='relu'))
model.add(Dropout(0.5))
model.add(Conv1D(32, 8, padding='valid', activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(num_classes, activation='softmax'))
# Compile model
epochs = 25
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']);
model.summary();

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 3000, 3)           240000    
_________________________________________________________________
dropout_10 (Dropout)         (None, 3000, 3)           0         
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 2993, 32)          800       
_________________________________________________________________
dropout_11 (Dropout)         (None, 2993, 32)          0         
_________________________________________________________________
conv1d_8 (Conv1D)            (None, 2986, 32)          8224      
_________________________________________________________________
global_max_pooling1d_4 (Glob (None, 32)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 64)                2112      
__________

In [16]:
early = [EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)]
# Fit the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=32, callbacks=early);

Train on 4434 samples, validate on 1901 samples
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25


In [22]:
# Final evaluation of the model
pred_nn = model.predict(X_test)
nn_score = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy of CNN:   %0.3f" % nn_score[1])

Accuracy of CNN:   0.919


In [31]:
cm_nn = metrics.confusion_matrix(y_test.argmax(axis=1), pred_nn.argmax(axis=1))
scorer(cm_nn)

Precision is: 0.884
Recall is: 0.967
F-1 Score is: 0.924

