In [1]:
#*****************************************Import********************************************
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import warnings
from sklearn.model_selection import cross_val_score
from sklearn import metrics
from sklearn import preprocessing
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')
import time

df=pd.read_csv("dataset/PhishingDataset.csv")

ROWS = len(df.axes[0]) 
COLUMNS = len(df.axes[1])
df = df.replace([np.inf, -np.inf], np.nan)
df=df.dropna()
LABEL = df.iloc[:,-1:].columns[0]
print("Row-Column Count before cleaning: (", ROWS , " , ",  COLUMNS , ")")

df.iloc[:,-1:].value_counts()

Row-Column Count before cleaning: ( 235795  ,  57 )


label
1        134850
0        100945
dtype: int64

In [2]:
cols = df.select_dtypes(include=['float64','int64']).columns
cols=cols.drop('URLSimilarityIndex')
df = pd.DataFrame(df[cols]).copy()
df = df.reset_index(drop=True)
LABEL = df.iloc[:,-1:].columns[0]

Train, Test=train_test_split(df,test_size=0.3, random_state=4)

yTrain = pd.DataFrame(Train[LABEL]).copy()
yTest = pd.DataFrame(Test[LABEL]).copy()

Train.drop(LABEL, axis=1, inplace=True)
Test.drop(LABEL, axis=1, inplace=True)

xTrain = pd.DataFrame(Train).copy()
xTest = pd.DataFrame(Test).copy()
print("Finished Train-Test Split.")
PERFORMANCE = pd.DataFrame(columns=['Model','Accuracy','Precision','Recall','F1Score','TrainingTime','PredictionTime'])

Finished Train-Test Split.


In [3]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()

start = time.time()
n = len(yTrain.axes[0])
for i in range(0,n):
    try:
        X = xTrain.iloc[[i]].to_numpy() 
        Y = yTrain.iloc[[i]]
        Y = np.array(Y[LABEL])
        model.partial_fit(X, Y, classes=[0,1])        
    except Exception as ex:
        print(ex)

end = time.time()
trainTime = int(end - start)

start = time.time()
X = xTrain.to_numpy()
Y = np.array(yTrain[LABEL])
predict = model.predict(X)
accuracy = metrics.accuracy_score(Y, predict)
precision = metrics.precision_score(Y, predict)
recall = metrics.recall_score(Y, predict)
f1score = metrics.f1_score(Y, predict, zero_division=1)
end = time.time()
predictionTime = int(end - start)
trainRow = {'Model':"GaussianNB_Training",'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1Score':f1score,
           'TrainingTime':trainTime,'PredictionTime':predictionTime}

start = time.time()
X = xTest.to_numpy()
Y = np.array(yTest[LABEL])
predict = model.predict(X)
accuracy = metrics.accuracy_score(Y, predict)
precision = metrics.precision_score(Y, predict)
recall = metrics.recall_score(Y, predict)
f1score = metrics.f1_score(Y, predict, zero_division=1)
end = time.time()
predictionTime = int(end - start)
testRow = {'Model':"GaussianNB_Prediction",'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1Score':f1score,
           'TrainingTime':trainTime,'PredictionTime':predictionTime}

print(trainRow)
print(testRow)
PERFORMANCE = PERFORMANCE.append(trainRow , ignore_index=True)
PERFORMANCE = PERFORMANCE.append(testRow , ignore_index=True)

{'Model': 'GaussianNB_Training', 'Accuracy': 0.5720482745250097, 'Precision': 0.5720482745250097, 'Recall': 1.0, 'F1Score': 0.7277744377129292, 'TrainingTime': 140, 'PredictionTime': 0}
{'Model': 'GaussianNB_Prediction', 'Accuracy': 0.571537624224261, 'Precision': 0.571537624224261, 'Recall': 1.0, 'F1Score': 0.7273610448956095, 'TrainingTime': 140, 'PredictionTime': 0}


In [4]:
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB()

start = time.time()
n = len(yTrain.axes[0])
for i in range(0,n):
    try:
        X = xTrain.iloc[[i]].to_numpy() 
        Y = yTrain.iloc[[i]]
        Y = np.array(Y[LABEL])
        model.partial_fit(X, Y, classes=[0,1])        
    except Exception as ex:
        print(ex)

end = time.time()
trainTime = int(end - start)

start = time.time()
X = xTrain.to_numpy()
Y = np.array(yTrain[LABEL])
predict = model.predict(X)
accuracy = metrics.accuracy_score(Y, predict)
precision = metrics.precision_score(Y, predict)
recall = metrics.recall_score(Y, predict)
f1score = metrics.f1_score(Y, predict, zero_division=1)
end = time.time()
predictionTime = int(end - start)
trainRow = {'Model':"MultinomialNB_Training",'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1Score':f1score,
           'TrainingTime':trainTime,'PredictionTime':predictionTime}

start = time.time()
X = xTest.to_numpy()
Y = np.array(yTest[LABEL])
predict = model.predict(X)
accuracy = metrics.accuracy_score(Y, predict)
precision = metrics.precision_score(Y, predict)
recall = metrics.recall_score(Y, predict)
f1score = metrics.f1_score(Y, predict, zero_division=1)
end = time.time()
predictionTime = int(end - start)
testRow = {'Model':"MultinomialNB_Prediction",'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1Score':f1score,
           'TrainingTime':trainTime,'PredictionTime':predictionTime}

print(trainRow)
print(testRow)
PERFORMANCE = PERFORMANCE.append(trainRow , ignore_index=True)
PERFORMANCE = PERFORMANCE.append(testRow , ignore_index=True)

{'Model': 'MultinomialNB_Training', 'Accuracy': 0.6893660333462582, 'Precision': 0.6528531549786739, 'Recall': 0.9759055284897268, 'F1Score': 0.7823418038563096, 'TrainingTime': 212, 'PredictionTime': 0}
{'Model': 'MultinomialNB_Prediction', 'Accuracy': 0.6908494606935354, 'Precision': 0.6537041024197154, 'Recall': 0.9762552559980213, 'F1Score': 0.7830650041166959, 'TrainingTime': 212, 'PredictionTime': 0}


In [5]:
from sklearn.naive_bayes import BernoulliNB
model = BernoulliNB()

start = time.time()
n = len(yTrain.axes[0])
for i in range(0,n):
    try:
        X = xTrain.iloc[[i]].to_numpy() 
        Y = yTrain.iloc[[i]]
        Y = np.array(Y[LABEL])
        model.partial_fit(X, Y, classes=[0,1])        
    except Exception as ex:
        print(ex)

end = time.time()
trainTime = int(end - start)

start = time.time()
X = xTrain.to_numpy()
Y = np.array(yTrain[LABEL])
predict = model.predict(X)
accuracy = metrics.accuracy_score(Y, predict)
precision = metrics.precision_score(Y, predict)
recall = metrics.recall_score(Y, predict)
f1score = metrics.f1_score(Y, predict, zero_division=1)
end = time.time()
predictionTime = int(end - start)
trainRow = {'Model':"BernoulliNB_Training",'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1Score':f1score,
           'TrainingTime':trainTime,'PredictionTime':predictionTime}

start = time.time()
X = xTest.to_numpy()
Y = np.array(yTest[LABEL])
predict = model.predict(X)
accuracy = metrics.accuracy_score(Y, predict)
precision = metrics.precision_score(Y, predict)
recall = metrics.recall_score(Y, predict)
f1score = metrics.f1_score(Y, predict, zero_division=1)
end = time.time()
predictionTime = int(end - start)
testRow = {'Model':"BernoulliNB_Prediction",'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1Score':f1score,
           'TrainingTime':trainTime,'PredictionTime':predictionTime}

print(trainRow)
print(testRow)
PERFORMANCE = PERFORMANCE.append(trainRow , ignore_index=True)
PERFORMANCE = PERFORMANCE.append(testRow , ignore_index=True)

{'Model': 'BernoulliNB_Training', 'Accuracy': 0.986768127181078, 'Precision': 0.9792177563489755, 'Recall': 0.9980512603262021, 'F1Score': 0.9885448136958712, 'TrainingTime': 158, 'PredictionTime': 0}
{'Model': 'BernoulliNB_Prediction', 'Accuracy': 0.9865420772134184, 'Precision': 0.9787533349502789, 'Recall': 0.99812020776651, 'F1Score': 0.9883419054616703, 'TrainingTime': 158, 'PredictionTime': 0}


In [6]:
from sklearn.linear_model import PassiveAggressiveClassifier
model = PassiveAggressiveClassifier()

start = time.time()
n = len(yTrain.axes[0])
for i in range(0,n):
    try:
        X = xTrain.iloc[[i]].to_numpy() 
        Y = yTrain.iloc[[i]]
        Y = np.array(Y[LABEL])
        model.partial_fit(X, Y, classes=[0,1])        
    except Exception as ex:
        print(ex)

end = time.time()
trainTime = int(end - start)

start = time.time()
X = xTrain.to_numpy()
Y = np.array(yTrain[LABEL])
predict = model.predict(X)
accuracy = metrics.accuracy_score(Y, predict)
precision = metrics.precision_score(Y, predict)
recall = metrics.recall_score(Y, predict)
f1score = metrics.f1_score(Y, predict, zero_division=1)
end = time.time()
predictionTime = int(end - start)
trainRow = {'Model':"PassiveAggressive_Training",'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1Score':f1score,
           'TrainingTime':trainTime,'PredictionTime':predictionTime}

start = time.time()
X = xTest.to_numpy()
Y = np.array(yTest[LABEL])
predict = model.predict(X)
accuracy = metrics.accuracy_score(Y, predict)
precision = metrics.precision_score(Y, predict)
recall = metrics.recall_score(Y, predict)
f1score = metrics.f1_score(Y, predict, zero_division=1)
end = time.time()
predictionTime = int(end - start)
testRow = {'Model':"PassiveAggressive_Prediction",'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1Score':f1score,
           'TrainingTime':trainTime,'PredictionTime':predictionTime}

print(trainRow)
print(testRow)
PERFORMANCE = PERFORMANCE.append(trainRow , ignore_index=True)
PERFORMANCE = PERFORMANCE.append(testRow , ignore_index=True)

{'Model': 'PassiveAggressive_Training', 'Accuracy': 0.9939535672741373, 'Precision': 0.9938364274537996, 'Recall': 0.9956047447574666, 'F1Score': 0.9947198002200964, 'TrainingTime': 108, 'PredictionTime': 0}
{'Model': 'PassiveAggressive_Prediction', 'Accuracy': 0.9941757729116895, 'Precision': 0.9942446398577216, 'Recall': 0.9955725946079644, 'F1Score': 0.9949081741107844, 'TrainingTime': 108, 'PredictionTime': 0}


In [7]:
from sklearn.linear_model import Perceptron
model = Perceptron()

start = time.time()
n = len(yTrain.axes[0])
for i in range(0,n):
    try:
        X = xTrain.iloc[[i]].to_numpy() 
        Y = yTrain.iloc[[i]]
        Y = np.array(Y[LABEL])
        model.partial_fit(X, Y, classes=[0,1])        
    except Exception as ex:
        print(ex)

end = time.time()
trainTime = int(end - start)

start = time.time()
X = xTrain.to_numpy()
Y = np.array(yTrain[LABEL])
predict = model.predict(X)
accuracy = metrics.accuracy_score(Y, predict)
precision = metrics.precision_score(Y, predict)
recall = metrics.recall_score(Y, predict)
f1score = metrics.f1_score(Y, predict, zero_division=1)
end = time.time()
predictionTime = int(end - start)
trainRow = {'Model':"Perceptron_Training",'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1Score':f1score,
           'TrainingTime':trainTime,'PredictionTime':predictionTime}

start = time.time()
X = xTest.to_numpy()
Y = np.array(yTest[LABEL])
predict = model.predict(X)
accuracy = metrics.accuracy_score(Y, predict)
precision = metrics.precision_score(Y, predict)
recall = metrics.recall_score(Y, predict)
f1score = metrics.f1_score(Y, predict, zero_division=1)
end = time.time()
predictionTime = int(end - start)
testRow = {'Model':"Perceptron_Prediction",'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1Score':f1score,
           'TrainingTime':trainTime,'PredictionTime':predictionTime}

print(trainRow)
print(testRow)
PERFORMANCE = PERFORMANCE.append(trainRow , ignore_index=True)
PERFORMANCE = PERFORMANCE.append(testRow , ignore_index=True)

{'Model': 'Perceptron_Training', 'Accuracy': 0.9415592283830942, 'Precision': 0.9084785290262894, 'Recall': 0.9984219445032833, 'F1Score': 0.9513290411124792, 'TrainingTime': 135, 'PredictionTime': 0}
{'Model': 'Perceptron_Prediction', 'Accuracy': 0.9436378800944316, 'Precision': 0.9112184333461217, 'Recall': 0.9986890922582241, 'F1Score': 0.9529507558324778, 'TrainingTime': 135, 'PredictionTime': 0}


In [8]:
from sklearn.linear_model import SGDClassifier
model = SGDClassifier()

start = time.time()
n = len(yTrain.axes[0])
for i in range(0,n):
    try:
        X = xTrain.iloc[[i]].to_numpy() 
        Y = yTrain.iloc[[i]]
        Y = np.array(Y[LABEL])
        model.partial_fit(X, Y, classes=[0,1])        
    except Exception as ex:
        print(ex)

end = time.time()
trainTime = int(end - start)

start = time.time()
X = xTrain.to_numpy()
Y = np.array(yTrain[LABEL])
predict = model.predict(X)
accuracy = metrics.accuracy_score(Y, predict)
precision = metrics.precision_score(Y, predict)
recall = metrics.recall_score(Y, predict)
f1score = metrics.f1_score(Y, predict, zero_division=1)
end = time.time()
predictionTime = int(end - start)
trainRow = {'Model':"SGDClassifier_Training",'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1Score':f1score,
           'TrainingTime':trainTime,'PredictionTime':predictionTime}

start = time.time()
X = xTest.to_numpy()
Y = np.array(yTest[LABEL])
predict = model.predict(X)
accuracy = metrics.accuracy_score(Y, predict)
precision = metrics.precision_score(Y, predict)
recall = metrics.recall_score(Y, predict)
f1score = metrics.f1_score(Y, predict, zero_division=1)
end = time.time()
predictionTime = int(end - start)
testRow = {'Model':"SGDClassifier_Prediction",'Accuracy':accuracy, 'Precision':precision, 'Recall':recall, 'F1Score':f1score,
           'TrainingTime':trainTime,'PredictionTime':predictionTime}

print(trainRow)
print(testRow)
PERFORMANCE = PERFORMANCE.append(trainRow , ignore_index=True)
PERFORMANCE = PERFORMANCE.append(testRow , ignore_index=True)

{'Model': 'SGDClassifier_Training', 'Accuracy': 0.944327985653354, 'Precision': 0.913169095333663, 'Recall': 0.9975323024782885, 'F1Score': 0.9534882543796157, 'TrainingTime': 108, 'PredictionTime': 0}
{'Model': 'SGDClassifier_Prediction', 'Accuracy': 0.9465782665856175, 'Precision': 0.9160442254864123, 'Recall': 0.9979965372248331, 'F1Score': 0.9552659303715805, 'TrainingTime': 108, 'PredictionTime': 0}


In [9]:
PERFORMANCE.to_csv('dataset/ReportsMay19/P7_IdentifyClassifiersReport.csv',index = False)
PERFORMANCE

Unnamed: 0,Model,Accuracy,Precision,Recall,F1Score,TrainingTime,PredictionTime
0,GaussianNB_Training,0.572048,0.572048,1.0,0.727774,140,0
1,GaussianNB_Prediction,0.571538,0.571538,1.0,0.727361,140,0
2,MultinomialNB_Training,0.689366,0.652853,0.975906,0.782342,212,0
3,MultinomialNB_Prediction,0.690849,0.653704,0.976255,0.783065,212,0
4,BernoulliNB_Training,0.986768,0.979218,0.998051,0.988545,158,0
5,BernoulliNB_Prediction,0.986542,0.978753,0.99812,0.988342,158,0
6,PassiveAggressive_Training,0.993954,0.993836,0.995605,0.99472,108,0
7,PassiveAggressive_Prediction,0.994176,0.994245,0.995573,0.994908,108,0
8,Perceptron_Training,0.941559,0.908479,0.998422,0.951329,135,0
9,Perceptron_Prediction,0.943638,0.911218,0.998689,0.952951,135,0
