In [None]:
# importing the required libraries 
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.applications.vgg16 import VGG16 as PretrainedModel, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import confusion_matrix
from glob import glob

import numpy as np
import pandas as pd
import shutil, random, os
from os import path
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline

In [2]:
import boto3
bucket='s3-dle-24a2dfab-c6ea-475a-af08-290603fc8c40' # Or whatever you called your bucket
data_key = 'twitter_fake_real_image_dataset.zip' # Where the file is within your bucket
data_location = 's3://s3-dle-24a2dfab-c6ea-475a-af08-290603fc8c40/twitter_fake_real_image_dataset.zip'.format(bucket, data_key)
!unzip -o twitter_fake_real_image_dataset.zip

train_fake_dir = 'twitter_fake_real_image_dataset/train/fake'
train_real_dir = 'twitter_fake_real_image_dataset/train/real'
test_fake_dir = 'twitter_fake_real_image_dataset/test/fake'
test_real_dir = 'twitter_fake_real_image_dataset/test/real'

In [None]:
print('total training fake images :', len(os.listdir(train_fake_dir ) ))
print('total training real images :', len(os.listdir(train_real_dir ) ))

print('total validation fake images :', len(os.listdir( test_fake_dir ) ))
print('total validation real images :', len(os.listdir( test_real_dir ) ))

folders = glob(train_dir + '/*')
print(folders)

In [None]:
# configuring the VGG16 model for Transfer Learning

IMAGE_SIZE = [150, 150]

ptm = PretrainedModel(
    input_shape=[*IMAGE_SIZE, 3],
    weights='imagenet',
    include_top=False)

# freeze pretrained model weights
ptm.trainable = False

# map the data into feature vectors
# Keras image data generator returns classes one-hot encoded

K = len(folders)
x = Flatten()(ptm.output)
x = Dense(K, activation='softmax')(x)

# create a model object
model = Model(inputs=ptm.input, outputs=x)

# view the structure of the model
model.summary()

In [None]:
model.compile(
  loss='sparse_categorical_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

# create an instance of ImageDataGenerator
gen_train = ImageDataGenerator(
  rotation_range=20,
  width_shift_range=0.1,
  height_shift_range=0.1,
  shear_range=0.1,
  zoom_range=0.2,
  horizontal_flip=True,
  preprocessing_function=preprocess_input
)

gen_test = ImageDataGenerator(
  preprocessing_function=preprocess_input
)

batch_size = 128

# create generators
train_generator = gen_train.flow_from_directory(
  train_dir,
  shuffle=True,
  target_size=(150, 150),
  class_mode='sparse',
  batch_size=batch_size,
)
valid_generator = gen_test.flow_from_directory(
  test_dir,
  target_size=(150, 150),
  batch_size=batch_size,
  class_mode='sparse'
)

In [None]:
# This is for saving the best model files, which can be later used for validation dataset

filepath="vgg16_model_fake_real_image.hdf5"
model_checkpoint=ModelCheckpoint(filepath,save_best_only=True,verbose=1)
callbacks_list=[model_checkpoint]

# fit the model
vgg_withAug = model.fit(
  train_generator,
  validation_data=valid_generator,
  epochs=300,
  steps_per_epoch=13,
  validation_steps=3,
  verbose=2,
  callbacks=callbacks_list
)

In [None]:
model.save("s3://s3-dle-24a2dfab-c6ea-475a-af08-290603fc8c40/vgg16_model_fake_real_image.hdf5")

In [None]:
# Plotting the accuracy 
plt.plot(vgg_withAug .history['accuracy'], label='acc')
plt.plot(vgg_withAug .history['val_accuracy'], label='val_acc')
plt.legend()
plt.show()


# Plotting the loss plot for training and validation. 
plt.subplot(2,1,2)
plt.plot(vgg_withAug .history['loss'])
plt.plot(vgg_withAug .history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')

In [None]:
# Confusion Matrix
Y_pred = model.predict_generator(valid_generator, 551 // batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(valid_generator.classes, y_pred))