In [1]:
import keras,os
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D , Flatten
from keras.preprocessing.image import ImageDataGenerator

import tensorflow as tf
from keras.layers import Dropout
print("GPU is","avaliable" if tf.config.experimental.list_physical_devices("GPU") else "not available")
from sklearn.metrics import confusion_matrix
import os

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
import pandas as pd
import time

GPU is avaliable


# image-based PWD training on EngPhish, refer to the middle column of Table XIII and Table XIV

In [None]:

eng_root = "../data/engphish/engphish_screen/"
eng_folder=os.listdir(eng_root)

 
image_paths = []
labels = []

for label in os.listdir(eng_root):
  #print('label is',label)
  label_path = os.path.join(eng_root, label)
  if os.path.isdir(label_path):
      for image_file in os.listdir(label_path):
          image_path = os.path.join(label_path, image_file)
          image_paths.append(image_path)
          #print('image_path is',image_path)
          labels.append(label)
train_image_paths, test_image_paths, train_labels, test_labels = train_test_split(
  image_paths, labels, test_size=0.2, shuffle=True, random_state=42)

train_df = pd.DataFrame({'paths': train_image_paths, 'labels': train_labels})
test_df = pd.DataFrame({'paths': test_image_paths, 'labels': test_labels})



image_width = 224
image_height = 224
batch_size = 128


train_datagen = ImageDataGenerator(
  rescale=1./255,   
)

test_datagen = ImageDataGenerator(rescale=1./255)


train_generator = train_datagen.flow_from_dataframe(
  dataframe=train_df,
  x_col="paths",
  y_col="labels",
  target_size=(image_width, image_height),
  batch_size=batch_size,
  class_mode='categorical',#binary',
  shuffle=True
)

test_generator = train_datagen.flow_from_dataframe(
  dataframe=test_df,
  x_col="paths",
  y_col="labels",
  target_size=(image_width, image_height),
  batch_size=batch_size,
  class_mode='categorical',#binary',
  shuffle=True
)

#vgg16

def create_model2():
    model_vgg16 = Sequential()
    model_vgg16.add(Conv2D(input_shape=(224,224,3),filters=64,kernel_size=(3,3),padding="valid", activation="relu"))
    model_vgg16.add(Conv2D(filters=64,kernel_size=(3,3),padding="valid", activation="relu"))
    model_vgg16.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
    model_vgg16.add(Dropout(0.25))
    model_vgg16.add(Conv2D(filters=128, kernel_size=(3,3), padding="valid", activation="relu"))
    model_vgg16.add(Conv2D(filters=128, kernel_size=(3,3), padding="valid", activation="relu"))
    model_vgg16.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
    model_vgg16.add(Dropout(0.25))
    model_vgg16.add(Conv2D(filters=256, kernel_size=(3,3), padding="valid", activation="relu"))
    model_vgg16.add(Conv2D(filters=256, kernel_size=(3,3), padding="valid", activation="relu"))
    model_vgg16.add(Conv2D(filters=256, kernel_size=(3,3), padding="valid", activation="relu"))
    model_vgg16.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
    model_vgg16.add(Dropout(0.25))
    model_vgg16.add(Conv2D(filters=512, kernel_size=(3,3), padding="valid", activation="relu"))
    model_vgg16.add(Conv2D(filters=512, kernel_size=(3,3), padding="valid", activation="relu"))
    model_vgg16.add(Conv2D(filters=512, kernel_size=(3,3), padding="valid", activation="relu"))
    model_vgg16.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
    model_vgg16.add(Dropout(0.25))
    model_vgg16.add(Conv2D(filters=512, kernel_size=(3,3), padding="valid", activation="relu"))
    model_vgg16.add(Conv2D(filters=512, kernel_size=(3,3), padding="valid", activation="relu"))
    model_vgg16.add(Conv2D(filters=512, kernel_size=(3,3), padding="valid", activation="relu"))
    model_vgg16.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
    model_vgg16.add(Dropout(0.25))
    model_vgg16.add(Flatten())
    model_vgg16.add(Dense(units=4096,activation="relu",use_bias=False))
    model_vgg16.add(Dropout(0.5))
    model_vgg16.add(Dense(units=4096,activation="relu",use_bias=False))
    model_vgg16.add(Dropout(0.5))
    model_vgg16.add(Dense(2,activation="softmax",use_bias=False)) #"softmax"
    model_vgg16.compile(optimizer='adam', loss='categorical_crossentropy',
    metrics=['accuracy'])#sparse_categorical_crossentropy
    return model_vgg16

#cnn model
model_cnn = tf.keras.Sequential([
  tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (224, 224, 3)),
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu'),
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation=tf.nn.relu),
  tf.keras.layers.Dense(2, activation=tf.nn.softmax)
])


model_cnn.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

cnn_start=time.time()
model_cnn.fit(train_generator, epochs=20)
cnn_end=time.time()
cnn_train_time=cnn_end-cnn_start

cnn_test_st=time.time()
predictions = model_cnn.predict(test_generator)
cnn_test_en=time.time()
cnn_test_time=cnn_test_en-cnn_test_st
#predictions =loaded_model.predict(test_generator)

binary_predictions = tf.argmax(predictions, axis=1)
ground_truth_labels = test_df.labels
label_mapping = {'benign': 0, 'phish': 1}

ground_truth = [label_mapping[label] for label in ground_truth_labels]

cm = confusion_matrix(ground_truth, binary_predictions)

# Extract True Positives (TP), True Negatives (TN), False Positives (FP), False Negatives (FN) from the confusion matrix
TN = cm[0][0]
FP = cm[0][1]
FN = cm[1][0]
TP = cm[1][1]

# Calculate True Positive Rate (TPR) and True Negative Rate (TNR)
TPR = TP / (TP + FN)
TNR = TN / (TN + FP)
FPR = FP / (FP + TN)
FNR = FN / (FN + TP)
#print('TPR is %.3f, TNR is %.3f'%(TPR,TNR))
print('FPR is %.3f, FNR is %.3f'%(FPR,FNR))

