# Phishing Website Detection using Neural Network
The aim of the experiments conducted in this notebook is to give an idea of how modern _phishing website attacks_ can be prevented using machine learning. To do this, we are going to use the [Phishing Websites' Dataset](https://archive.ics.uci.edu/ml/datasets/phishing+websites). 

Initially URLs from the dataset were broken down to numerical values using the file "Feature Extraction" which classifies and assigns values based on the research performed during the Capstone Project. Once that was done, every result was stored in a CSV file to be used here in this notebook.

We will start off the experiments by importing the initial set of Python modules.  

In [None]:
# Filter the uneccesary warnings
import warnings
warnings.filterwarnings("ignore")

# Import pandas and numpy
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import io
df = pd.read_csv('csv_result-Phishing_features (1).csv')

### Data loading and basic display

In [None]:
df.head()

In [None]:
y = df['Result']

In [None]:
y=y.replace(-1,0)
y

In [None]:
X = df.iloc[:,1:31]

In [None]:
X.head()  

### Inspecting data dimensions and column names

In [None]:
# Data dimension
X.shape

In [None]:
# Data columns
X.columns

### Finding out the distribution of the class labels and preparing a report

In [None]:
from collections import Counter

classes = Counter(df['Result'].values)
classes.most_common()

In [None]:
class_dist = pd.DataFrame(classes.most_common(), columns=['Class', 'Num_Observations'])
class_dist

### Preparing a basic bar plot of the distribution of the class labels

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

subplot = class_dist.groupby('Class')['Num_Observations'].sum().plot(kind='barh', width=0.2, figsize=(10,8),color=('skyblue','lightgreen'))

subplot.set_title('Class distribution of the websites', fontsize = 15)
subplot.set_xlabel('Number of Observations', fontsize = 14)
subplot.set_ylabel('Class', fontsize = 14)

for i in subplot.patches:
    subplot.text(i.get_width()+0.1, i.get_y()+0.1, \
                 str(i.get_width()), fontsize=11)

### Finding out the summary statistics from the data

In [None]:
X.describe().T

### Finding out the basic information of the columns present in the dataset

In [None]:
X.info()

### Data splitting

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.33, random_state=1)

### Defining an EarlyStopping callback

In [None]:
from keras import callbacks

es_cb = callbacks.EarlyStopping(monitor='loss', min_delta=0.001, patience=5)

## Trying to improve the predictive performance with Neural Networks

### Model-1 with SGD Optimizer

In [None]:
import tensorflow as tf

model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(256, activation=tf.nn.relu),
  tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
model.compile(optimizer='sgd',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history_sgd1 = model.fit(X_train, y_train, epochs=15,batch_size=1,validation_data=(X_val, y_val),callbacks=[es_cb])
model.evaluate(X_test, y_test)

In [None]:
loss_train = history_sgd1.history['loss']
loss_val = history_sgd1.history['val_loss']
epochs = range(1,16)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Train Loss of Model-1 SGD')
plt.legend()
plt.show()

In [None]:
acc_train_sgd1 = history_sgd1.history['accuracy']
acc_val_sgd1 = history_sgd1.history['val_accuracy']
epochs = range(1,16)
plt.plot(epochs, acc_train_sgd1, 'g', label='Training accuracy')
plt.plot(epochs, acc_val_sgd1, 'b', label='validation accuracy')
plt.title('Training and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Train Accuracy of Model-1 SGD')
plt.legend()
plt.show()

In [None]:
model.summary()

### Model-1 with RMSProp Optimizer

In [None]:
import tensorflow as tf

model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(256, activation=tf.nn.relu),
  tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history_rmsprop1 = model.fit(X_train, y_train, epochs=15,batch_size=1,validation_data=(X_val, y_val),callbacks=[es_cb])
model.evaluate(X_test, y_test)

In [None]:
loss_train = history_rmsprop1.history['loss']
loss_val = history_rmsprop1.history['val_loss']
epochs = range(1,16)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Train Loss of Model-1 RMSProp')
plt.legend()
plt.show()

In [None]:
acc_train_rmsprop1 = history_rmsprop1.history['accuracy']
acc_val_rmsprop1 = history_rmsprop1.history['val_accuracy']
epochs = range(1,16)
plt.plot(epochs, acc_train_rmsprop1, 'g', label='Training accuracy')
plt.plot(epochs, acc_val_rmsprop1, 'b', label='validation accuracy')
plt.title('Training and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Train Accuracy of Model-1 RMSProp')
plt.legend()
plt.show()

In [None]:
model.summary()

### Model-1 with Adam Optimizer

In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(256, activation=tf.nn.relu),
  tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history_adam1 = model.fit(X_train, y_train, epochs=15,batch_size=1,validation_data=(X_val, y_val),callbacks=[es_cb])
model.evaluate(X_test, y_test)

In [None]:
loss_train = history_adam1.history['loss']
loss_val = history_adam1.history['val_loss']
epochs = range(1,16)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Train Loss of Model-1 Adam')
plt.legend()
plt.show()

In [None]:
acc_train_adam1 = history_adam1.history['accuracy']
acc_val_adam1 = history_adam1.history['val_accuracy']
epochs = range(1,16)
plt.plot(epochs, acc_train_adam1, 'g', label='Training accuracy')
plt.plot(epochs, acc_val_adam1, 'b', label='validation accuracy')
plt.title('Training and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Train Accuracy of Model-1 Adam')
plt.legend()
plt.show()

In [None]:
model.summary()

### Comparision between Optimizer for Model-1

In [None]:
l=[0.9542,0.9263,0.9446]
l1 = ['SGD','RMSProp','Adam']
import matplotlib.pyplot as plt
plt.ylabel('Test Accuracy of Model1')
plt.xlabel('Optimizers')
plt.plot(l1,l)

### Model-2 with Adam Optimizer
### Here in Model-2, as we are increasing the number of neurons, we are also using dropout to overcome overfitting

In [None]:
import tensorflow as tf

model1 = tf.keras.models.Sequential([
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(1024, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
model1.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history_adam2 = model1.fit(X_train, y_train, epochs=15,batch_size=1,validation_data=(X_val, y_val),callbacks=[es_cb])
model1.evaluate(X_test, y_test)

In [None]:
loss_train = history_adam2.history['loss']
loss_val = history_adam2.history['val_loss']
epochs = range(1,16)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Train Loss of Model-2 Adam')
plt.legend()
plt.show()

In [None]:
acc_train_adam2 = history_adam2.history['accuracy']
acc_val_adam2 = history_adam2.history['val_accuracy']
epochs = range(1,16)
plt.plot(epochs, acc_train_adam2, 'g', label='Training accuracy')
plt.plot(epochs, acc_val_adam2, 'b', label='validation accuracy')
plt.title('Training and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Train Accuracy of Model-2 Adam')
plt.legend()
plt.show()

In [None]:
model.summary()

### Model-2 with RMSProp Optimizer

In [None]:
import tensorflow as tf

model1 = tf.keras.models.Sequential([
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(1024, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
model1.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history_rmsprop2 = model1.fit(X_train, y_train, epochs=15,batch_size=1,validation_data=(X_val, y_val),callbacks=[es_cb])
model1.evaluate(X_test, y_test)

In [None]:
loss_train = history_rmsprop2.history['loss']
loss_val = history_rmsprop2.history['val_loss']
epochs = range(1,16)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Train Loss of Model-2 RMSProp')
plt.legend()
plt.show()

In [None]:
acc_train_rmsprop2 = history_rmsprop2.history['accuracy']
acc_val_rmsprop2 = history_rmsprop2.history['val_accuracy']
epochs = range(1,16)
plt.plot(epochs, acc_train_rmsprop2, 'g', label='Training accuracy')
plt.plot(epochs, acc_val_rmsprop2, 'b', label='validation accuracy')
plt.title('Training and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Train Accuracy of Model-2 RMSProp')
plt.legend()
plt.show()

In [None]:
model.summary()

### Model-2 with SGD Optimizer

In [None]:
import tensorflow as tf

model1 = tf.keras.models.Sequential([
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(1024, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
])
model1.compile(optimizer='sgd',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history_sgd2 = model1.fit(X_train, y_train, epochs=15,batch_size=1,validation_data=(X_val, y_val),callbacks=[es_cb])
model1.evaluate(X_test, y_test)

In [None]:
loss_train = history_sgd2.history['loss']
loss_val = history_sgd2.history['val_loss']
epochs = range(1,16)
plt.plot(epochs, loss_train, 'g', label='Training loss')
plt.plot(epochs, loss_val, 'b', label='validation loss')
plt.title('Training and Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Train Loss of Model-2 SGD')
plt.legend()
plt.show()

In [None]:
acc_train_sgd2 = history_sgd2.history['accuracy']
acc_val_sgd2 = history_sgd2.history['val_accuracy']
epochs = range(1,16)
plt.plot(epochs, acc_train_sgd2, 'g', label='Training accuracy')
plt.plot(epochs, acc_val_sgd2, 'b', label='validation accuracy')
plt.title('Training and Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Train Accuracy of Model-2 SGD')
plt.legend()
plt.show()

In [None]:
model.summary()

### Comparision between Optimizers for Model-2 

In [None]:
l=[0.9553,0.9523,0.9263]
l1 = ['Adam','RMSProp','SGD']
import matplotlib.pyplot as plt
plt.ylabel('Test Accuracy of Model1')
plt.xlabel('Optimizers')
plt.plot(l1,l)

### Comparision with SGD Optimizer for both models

In [None]:
l = [1,2,3,4,5]
plt.plot(acc_val_sgd1,l)
plt.plot(acc_val_sgd2,l)
plt.xlabel("Epochs")
plt.ylabel("Train Accuracy")

### Comparision with RMSProp Optimizer for both models

In [None]:
l = [1,2,3,4,5]
plt.plot(acc_val_rmsprop1,l)
plt.plot(acc_val_rmsprop2,l)
plt.xlabel("Epochs")
plt.ylabel("Train Accuracy")

### Comparision with Adam Optimizer for both models

In [None]:
l = [1,2,3,4,5]
plt.plot(acc_val_adam1,l)
plt.plot(acc_val_adam2,l)
plt.xlabel("Epochs")
plt.ylabel("Train Accuracy")

## Model Enhancement with TDLHBA hyperparameters and visualization

`TDLHBA` is technique introduced [in this paper](https://dl.acm.org/citation.cfm?id=3227655). We will use hyperparameter values as presented in the paper to see the performance enhancement of the model. 

In [None]:
# Imports
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import *

model_TDLHBA = Sequential()

model_TDLHBA.add(Dense(40, activation='relu',
          kernel_initializer='uniform',input_dim=30))
model_TDLHBA.add(Dense(30, activation='relu',
          kernel_initializer='uniform'))
model_TDLHBA.add(Dense(1,  activation='sigmoid', 
          kernel_initializer='uniform'))

adam = Adam(lr=0.0017470)
model_TDLHBA.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])

In [None]:
history_TDLHBA = model_TDLHBA.fit(X_train, y_train, batch_size=10, epochs=100, verbose=1, callbacks=[es_cb])

scores = model_TDLHBA.evaluate(X_test, y_test)
print('\nAccuracy score of the Neural Network with TDLHBA hyperparameter settings {0:.2f}%'.format(scores[1]*100))

In [None]:
N = 51

plt.figure(figsize=(8,6))
plt.plot(np.arange(0, N), history_TDLHBA.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), history_TDLHBA.history["accuracy"], label="train_acc")

plt.title("Training Loss and Accuracy on the dataset (with TDLHBA hyperparameter settings)")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="middle")
plt.show()