In [1]:
import numpy as np 
import pandas as pd 
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import statsmodels.api as sm 
from sklearn.utils import shuffle 
from collections import Counter 
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import keras
from keras.utils import to_categorical
import os
from keras.layers import *
from tensorflow.keras.layers import Dropout
from tensorflow.keras.regularizers import l2
from keras.models import Sequential, Model
from keras import optimizers

In [2]:
def parse_arff(filename):
	try:
		file = open(filename)
	except:
		print('file could not found or opened, try with file location')
	columns = []
	data = []

	for line in file:
		if line.startswith('@attribute'):
			line = line.strip()
			temp = line.split()
			columns.append(temp[1])
		elif line.startswith('@') or line.startswith('\n'):
			continue
		else:
			line = line.strip()
			temp = line.split(',')
			data.append(temp)

	return data,columns

In [3]:
data, columns = parse_arff('./data/Phishing_Legitimate_full.arff')
phishing_legitimate_full = pd.DataFrame(data, columns = columns, dtype=np.int8)
data, columns = parse_arff('./data/Training Dataset.arff')
training_dataset = pd.DataFrame(data, columns = columns, dtype=np.int8)
datasetcsv = pd.read_csv('./data/Dataset.csv')
TextFrameImage_features_new = pd.read_csv('./data/TextFrameImage Features_new.csv')
dataset_b = pd.read_csv('./data/dataset_B_05_2020.csv')
ISCXURL2016 = pd.read_csv('./data/Canadian Institute for Cybersecurity/Phishing.csv')
phishingcsv = pd.read_csv('./data/phishing.csv')

phishing_legitimate_full.columns
phishing_legitimate_full.head(5)
training_dataset.columns
training_dataset.head(5)
datasetcsv.columns
datasetcsv.head(5)
TextFrameImage_features_new.columns
TextFrameImage_features_new.head(5)
dataset_b.columns
dataset_b.head(5)
ISCXURL2016.columns
ISCXURL2016.head(5)
phishingcsv.columns
phishingcsv.head(5)

In [4]:
loss_fn = keras.losses.Huber(delta=1.0)
activation = ['softplus', 'softsign', 'selu', 'elu', 'exponential', 'tanh', 'sigmoid', 'relu']
optimizers = ['sgd', 'rmsprop', 'adam', 'adadelta', 'adagrad', 'adamax', 'nadam', 'ftrl']
binary_loss = ['binary_crossentropy', 'hinge', 'squared_hinge', loss_fn]

In [20]:
x_train, x_test, y_train, y_test = train_test_split(phishing_legitimate_full[phishing_legitimate_full.columns[:-1]],phishing_legitimate_full[phishing_legitimate_full.columns[-1]], test_size = 0.3)
x_train = np.array(x_train).astype(np.float32)
y_train = np.array(y_train).astype(np.float32)
x_test = np.array(x_test).astype(np.float32)
y_test = np.array(y_test).astype(np.float32)

In [24]:
model = keras.Sequential()
model.add(Dense(64, input_shape = (48,),  activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64,  activation='relu'))
model.add(Dense(1,activation='softplus'))

model.compile(loss=loss_fn, optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x_train, y_train, epochs = 20, validation_data=(x_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [25]:
x = x_train.reshape((x_train.shape[0], 1, 48))
xx = x_test.reshape((x_test.shape[0], 1, 48))

model = keras.Sequential()
model.add(keras.layers.LSTM(64, activation='relu',return_sequences=True, input_shape=(1,48)))
model.add(keras.layers.LSTM(128, activation='relu'))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(64 ,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='softplus'))

model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xx, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [26]:
x = x_train.reshape((x_train.shape[0], 4, 4, 3))
xt = x_test.reshape((x_test.shape[0], 4, 4, 3))

model = keras.Sequential()
model.add(keras.layers.Conv2D(64,(1,1), activation='relu',input_shape=(4, 4, 3)))
model.add(keras.layers.MaxPool2D(2,2))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='softplus'))

model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xt, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [157]:
x_train, x_test, y_train, y_test = train_test_split(datasetcsv[datasetcsv.columns[:-1]],datasetcsv[datasetcsv.columns[-1]], test_size = 0.3)
x_train = np.array(x_train).astype(np.float32)
y_train = np.array(y_train).astype(np.float32)
x_test = np.array(x_test).astype(np.float32)
y_test = np.array(y_test).astype(np.float32)

In [158]:
x = x_train.reshape((x_train.shape[0], 7, 2, 1))
xt = x_test.reshape((x_test.shape[0], 7, 2, 1))

model = keras.Sequential()
model.add(keras.layers.Conv2D(64,(1,1), activation='relu',input_shape=(7, 2, 1)))
model.add(keras.layers.MaxPool2D(2,2))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='tanh'))

model.compile(loss=loss_fn, optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xt, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [159]:
model = keras.Sequential()
model.add(Dense(64, input_shape = (14,),  activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64,  activation='relu'))
model.add(Dense(1,activation='softplus'))

model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x_train, y_train, epochs = 20, validation_data=(x_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [160]:
x = x_train.reshape((x_train.shape[0], 1, 14))
xx = x_test.reshape((x_test.shape[0], 1, 14))

model = keras.Sequential()
model.add(keras.layers.LSTM(64, activation='relu',return_sequences=True, input_shape=(1,14)))
model.add(keras.layers.LSTM(128, activation='relu'))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(64 ,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='softplus'))

model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xx, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [155]:
x_train, x_test, y_train, y_test = train_test_split(TextFrameImage_features_new[TextFrameImage_features_new.columns[:-1]],TextFrameImage_features_new[TextFrameImage_features_new.columns[-1]], test_size = 0.3)
x_train = np.array(x_train).astype(np.float32)
y_train = np.array(y_train).astype(np.float32)
x_test = np.array(x_test).astype(np.float32)
y_test = np.array(y_test).astype(np.float32)
y_train = np.where(y_train == 0.1, 0, y_train)
y_test = np.where(y_test == 0.1, 0, y_test)
y_train = np.where(y_train == 0.3, 1, y_train)
y_test = np.where(y_test == 0.3, 1, y_test)

In [156]:
x = x_train.reshape((x_train.shape[0], 7, 5, 1))
xt = x_test.reshape((x_test.shape[0], 7, 5, 1))

model = keras.Sequential()
model.add(keras.layers.Conv2D(64,(1,1), activation='relu',input_shape=(7, 5, 1)))
model.add(keras.layers.MaxPool2D(2,2))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='tanh'))

model.compile(loss=loss_fn, optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xt, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [63]:
model = keras.Sequential()
model.add(Dense(64, input_shape = (35,),  activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64,  activation='relu'))
model.add(Dense(1,activation='softplus'))

model.compile(loss=loss_fn, optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x_train, y_train, epochs = 20, validation_data=(x_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [65]:
x = x_train.reshape((x_train.shape[0], 1, 35))
xx = x_test.reshape((x_test.shape[0], 1, 35))

model = keras.Sequential()
model.add(keras.layers.LSTM(64, activation='relu',return_sequences=True, input_shape=(1,35)))
model.add(keras.layers.LSTM(128, activation='relu'))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(64 ,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='softplus'))

model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xx, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
phishing_legitimate_full, training_dataset, datasetcsv, TextFrameImage_features_new, dataset_b, ISCXURL2016, phishingcsv

In [150]:
x_train, x_test, y_train, y_test = train_test_split(dataset_b[dataset_b.columns[1:-1]], dataset_b[dataset_b.columns[-1]], test_size = 0.3)
y_train = np.where(y_train == 'phishing', 0, y_train)
y_test = np.where(y_test == 'phishing', 0, y_test)
y_train = np.where(y_train == 'legitimate', 1, y_train)
y_test = np.where(y_test == 'legitimate', 1, y_test)
x_train = np.array(x_train).astype(np.float32)
y_train = np.array(y_train).astype(np.float32)
x_test = np.array(x_test).astype(np.float32)
y_test = np.array(y_test).astype(np.float32)

In [151]:
model = keras.Sequential()
model.add(Dense(64, input_shape = (87,),  activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64,  activation='relu'))
model.add(Dense(1,activation='softplus'))

model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x_train, y_train, epochs = 20, validation_data=(x_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [82]:
x = x_train.reshape((x_train.shape[0], 1, 87))
xx = x_test.reshape((x_test.shape[0], 1, 87))

model = keras.Sequential()
model.add(keras.layers.LSTM(64, activation='relu',return_sequences=True, input_shape=(1,87)))
model.add(keras.layers.LSTM(128, activation='relu'))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(64 ,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='softplus'))

model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xx, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [148]:
x = x_train.reshape((x_train.shape[0], 29, 3, 1))
xt = x_test.reshape((x_test.shape[0], 29, 3, 1))

model = keras.Sequential()
model.add(keras.layers.Conv2D(64,(1,1), activation='relu',input_shape=(29, 3, 1)))
model.add(keras.layers.MaxPool2D(2,2))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='tanh'))

model.compile(loss=loss_fn, optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xt, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [165]:
x_train, x_test, y_train, y_test = train_test_split(ISCXURL2016[ISCXURL2016.columns[:-1]],ISCXURL2016[ISCXURL2016.columns[-1]], test_size = 0.3)
y_train = np.where(y_train == 'phishing', 0, y_train)
y_test = np.where(y_test == 'phishing', 0, y_test)
y_train = np.where(y_train == 'benign', 1, y_train)
y_test = np.where(y_test == 'benign', 1, y_test)
x_train = np.array(x_train).astype(np.float32)
y_train = np.array(y_train).astype(np.float32)
x_test = np.array(x_test).astype(np.float32)
y_test = np.array(y_test).astype(np.float32)

In [166]:
model = keras.Sequential()
model.add(Dense(64, input_shape = (79,),  activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64,  activation='relu'))
model.add(Dense(1,activation='tanh'))
 
model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x_train, y_train, epochs = 20, validation_data=(x_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [146]:
x = x_train.reshape((x_train.shape[0], 1, 79))
xx = x_test.reshape((x_test.shape[0], 1, 79))

model = keras.Sequential()
model.add(keras.layers.LSTM(64, activation='relu',return_sequences=True, input_shape=(1,79)))
model.add(keras.layers.LSTM(128, activation='relu'))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(64 ,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='tanh'))

model.compile(loss=loss_fn, optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xx, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [142]:
x = x_train.reshape((x_train.shape[0], 79, 1, 1))
xt = x_test.reshape((x_test.shape[0], 79, 1, 1))

model = keras.Sequential()
model.add(keras.layers.Conv2D(64,(1,1), activation='relu',input_shape=(79, 1, 1)))
model.add(keras.layers.MaxPool2D(1,1))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='tanh'))

model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xt, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [133]:
x_train, x_test, y_train, y_test = train_test_split(phishingcsv[phishingcsv.columns[:-1]],phishingcsv[phishingcsv.columns[-1]], test_size = 0.3)
y_train = np.where(y_train == -1, 0, y_train)
y_test = np.where(y_test == -1, 0, y_test)
x_train = np.array(x_train).astype(np.float32)
y_train = np.array(y_train).astype(np.float32)
x_test = np.array(x_test).astype(np.float32)
y_test = np.array(y_test).astype(np.float32)

In [138]:
model = keras.Sequential()
model.add(Dense(64, input_shape = (31,),  activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(64,  activation='relu'))
model.add(Dense(1,activation='tanh'))

model.compile(loss=loss_fn, optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x_train, y_train, epochs = 20, validation_data=(x_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [136]:
x = x_train.reshape((x_train.shape[0], 1, 31))
xx = x_test.reshape((x_test.shape[0], 1, 31))

model = keras.Sequential()
model.add(keras.layers.LSTM(64, activation='relu',return_sequences=True, input_shape=(1,31)))
model.add(keras.layers.LSTM(128, activation='relu'))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(64 ,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='softplus'))

model.compile(loss=loss_fn, optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xx, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [135]:
x = x_train.reshape((x_train.shape[0], 31, 1, 1))
xt = x_test.reshape((x_test.shape[0], 31, 1, 1))

model = keras.Sequential()
model.add(keras.layers.Conv2D(64,(1,1), activation='relu',input_shape=(31, 1, 1)))
model.add(keras.layers.MaxPool2D(1,1))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='tanh'))

model.compile(loss=loss_fn, optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xt, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
phishing_legitimate_full, training_dataset, datasetcsv, TextFrameImage_features_new, dataset_b, ISCXURL2016, phishingcsv

In [118]:
len(phishing_legitimate_full), len(training_dataset), len(datasetcsv), len(TextFrameImage_features_new), len(dataset_b), len(ISCXURL2016), len(phishingcsv)

(10000, 11055, 11000, 13071, 11430, 15367, 11054)

In [112]:
phishing_legitimate_full.columns, datasetcsv.columns

(Index(['NumDots', 'SubdomainLevel', 'PathLevel', 'UrlLength', 'NumDash',
        'NumDashInHostname', 'AtSymbol', 'TildeSymbol', 'NumUnderscore',
        'NumPercent', 'NumQueryComponents', 'NumAmpersand', 'NumHash',
        'NumNumericChars', 'NoHttps', 'RandomString', 'IpAddress',
        'DomainInSubdomains', 'DomainInPaths', 'HttpsInHostname',
        'HostnameLength', 'PathLength', 'QueryLength', 'DoubleSlashInPath',
        'NumSensitiveWords', 'EmbeddedBrandName', 'PctExtHyperlinks',
        'PctExtResourceUrls', 'ExtFavicon', 'InsecureForms',
        'RelativeFormAction', 'ExtFormAction', 'AbnormalFormAction',
        'PctNullSelfRedirectHyperlinks', 'FrequentDomainNameMismatch',
        'FakeLinkInStatusBar', 'RightClickDisabled', 'PopUpWindow',
        'SubmitInfoToEmail', 'IframeOrFrame', 'MissingTitle',
        'ImagesOnlyInForm', 'SubdomainLevelRT', 'UrlLengthRT',
        'PctExtResourceUrlsRT', 'AbnormalExtFormActionR', 'ExtMetaScriptLinkRT',
        'PctExtNullSelfRedir

In [117]:
ISCXURL2016.columns, dataset_b.columns

(Index(['Querylength', 'domain_token_count', 'path_token_count',
        'avgdomaintokenlen', 'longdomaintokenlen', 'avgpathtokenlen', 'tld',
        'charcompvowels', 'charcompace', 'ldl_url', 'ldl_domain', 'ldl_path',
        'ldl_filename', 'ldl_getArg', 'dld_url', 'dld_domain', 'dld_path',
        'dld_filename', 'dld_getArg', 'urlLen', 'domainlength', 'pathLength',
        'subDirLen', 'fileNameLen', 'this.fileExtLen', 'ArgLen', 'pathurlRatio',
        'ArgUrlRatio', 'argDomanRatio', 'domainUrlRatio', 'pathDomainRatio',
        'argPathRatio', 'executable', 'isPortEighty', 'NumberofDotsinURL',
        'ISIpAddressInDomainName', 'CharacterContinuityRate',
        'LongestVariableValue', 'URL_DigitCount', 'host_DigitCount',
        'Directory_DigitCount', 'File_name_DigitCount', 'Extension_DigitCount',
        'Query_DigitCount', 'URL_Letter_Count', 'host_letter_count',
        'Directory_LetterCount', 'Filename_LetterCount',
        'Extension_LetterCount', 'Query_LetterCount', 'Lo

In [116]:
TextFrameImage_features_new.columns, phishingcsv.columns

(Index(['has_ip', 'long_url', 'short_service', 'has_at',
        'double_slash_redirect', 'pref_suf', 'has_sub_domain', 'ssl_state',
        'long_domain', 'port', 'https_token', 'req_url', 'url_of_anchor',
        'submit_to_email', 'domain_Age', 'dns_record', 'traffic', 'page_rank',
        'links_to_page', 'links_to_page.1', 'right_click', 'popup', 'redirect',
        'tag_links', 'shf', 'sty_similarity', 'lay_similarity', 'stats_report',
        'favicon', 'Image_size', 'alt_text', 'form_login', 'mouseover',
        'iframe_redirection', 'has_https', 'target'],
       dtype='object'),
 Index(['Index', 'UsingIP', 'LongURL', 'ShortURL', 'Symbol@', 'Redirecting//',
        'PrefixSuffix-', 'SubDomains', 'HTTPS', 'DomainRegLen', 'Favicon',
        'NonStdPort', 'HTTPSDomainURL', 'RequestURL', 'AnchorURL',
        'LinksInScriptTags', 'ServerFormHandler', 'InfoEmail', 'AbnormalURL',
        'WebsiteForwarding', 'StatusBarCust', 'DisableRightClick',
        'UsingPopupWindow', 'IframeRed

In [173]:
x_train, x_test, y_train, y_test = train_test_split(training_dataset[training_dataset.columns[:-1]],training_dataset[training_dataset.columns[-1]], test_size = 0.3)
y_train = np.where(y_train == -1, 0, y_train)
y_test = np.where(y_test == -1, 0, y_test)
x_train = np.array(x_train).astype(np.int16)
y_train = np.array(y_train).astype(np.int16)
x_test = np.array(x_test).astype(np.int16)
y_test = np.array(y_test).astype(np.int16)

In [10]:
x_train, x_test, y_train, y_test = train_test_split(phishingcsv[phishingcsv.columns[:-1]],phishingcsv[phishingcsv.columns[-1]], test_size = 0.3)
y_train = np.where(y_train == -1, 0, y_train)
y_test = np.where(y_test == -1, 0, y_test)
x_train = np.array(x_train).astype(np.float32)
y_train = np.array(y_train).astype(np.float32)
x_test = np.array(x_test).astype(np.float32)
y_test = np.array(y_test).astype(np.float32)

In [7]:
x_train, x_test, y_train, y_test = train_test_split(dataset_b[dataset_b.columns[1:-1]], dataset_b[dataset_b.columns[-1]], test_size = 0.3)
y_train = np.where(y_train == 'phishing', 0, y_train)
y_test = np.where(y_test == 'phishing', 0, y_test)
y_train = np.where(y_train == 'legitimate', 1, y_train)
y_test = np.where(y_test == 'legitimate', 1, y_test)
x_train = np.array(x_train).astype(np.float32)
y_train = np.array(y_train).astype(np.float32)
x_test = np.array(x_test).astype(np.float32)
y_test = np.array(y_test).astype(np.float32)

In [None]:
model = keras.Sequential()
model.add(keras.layers.LSTM(32, activation='relu',return_sequences=True, input_shape=(29, 3)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(keras.layers.Reshape((32, 2, 2)))
model.add(keras.layers.Conv2D(128,(1,1), activation='relu'))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(64,  activation='relu'))
model.add(Dense(1,activation='tanh'))

x = x_train.reshape((x_train.shape[0], 29, 3))
xt = x_test.reshape((x_test.shape[0], 29, 3))

model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xt, y_test))

In [None]:
model = keras.Sequential()
model.add(Dense(64, input_shape = (31,), activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(keras.layers.Reshape((32, 4)))
model.add(keras.layers.LSTM(32, activation='relu',return_sequences=True, input_shape=(32, 4)))
model.add(Flatten())
model.add(keras.layers.Reshape((32, 16, 2)))
model.add(keras.layers.Conv2D(128, (1,1), input_shape=(32, 15, 2), activation='relu'))
model.add(Flatten())
model.add(Dense(1,activation='softplus'))

model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x_train, y_train, epochs = 20, validation_data=(x_test, y_test))

In [None]:
model = keras.Sequential()
model.add(Dense(64, input_shape = (31,), activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(keras.layers.Reshape((16, 4, 2)))
model.add(keras.layers.Conv2D(128, (1,1), input_shape=(16, 4, 2), activation='relu'))
model.add(Flatten())
model.add(keras.layers.Reshape((64, 128)))
model.add(keras.layers.LSTM(32, activation='relu',return_sequences=True, input_shape=(64, 128)))
model.add(Flatten())
model.add(Dense(1,activation='softplus'))

model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x_train, y_train, epochs = 20, validation_data=(x_test, y_test))

In [13]:
model = keras.Sequential()
model.add(keras.layers.LSTM(64, activation='relu',return_sequences=True, input_shape=(1,31)))
model.add(keras.layers.LSTM(128, activation='relu'))
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(64 ,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='softplus'))
model.add(keras.layers.Dense(30,activation='relu'))
model.add(keras.layers.Reshape((2,5,3)))
model.add(keras.layers.Conv2D(64,(1,1), activation='relu',input_shape=(2,5,3)))
model.add(keras.layers.MaxPool2D(2,2))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Conv2D(128,(1,1),activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dense(128,activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(1,activation='softplus'))

x = x_train.reshape((x_train.shape[0], 1, 31))
xx = x_test.reshape((x_test.shape[0], 1, 31))

model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=['accuracy'])
modelhistory = model.fit(x, y_train, epochs = 20, validation_data=(xx, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
