In [11]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns  

# Load the uploaded dataset
file_path = 'C:\\Users\\hp\\Downloads\\phishing_dataset.csv'
data = pd.read_csv(file_path)

# Display basic information and first few rows to understand structure
data_info = data.info()
data_head = data.head()

data_info, data_head

# Split dataset into features and target
X = data.drop(['id', 'Result'], axis=1)
y = data['Result']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize classifiers
classifiers = {
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42),
    "SVM": SVC(random_state=42),
    "KNN": KNeighborsClassifier(n_neighbors = 4),
    "AdaBoost": AdaBoostClassifier(n_estimators=50, learning_rate=1, algorithm='SAMME'),
    "GradientBoost": GradientBoostingClassifier(n_estimators=300,learning_rate=0.05,random_state=100, max_features=5 ),
    "Naive Bayes": GaussianNB()
}

# Evaluate each classifier
performance_metrics = {}
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    performance_metrics[name] = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='binary', pos_label=1),
        "Recall": recall_score(y_test, y_pred, average='binary', pos_label=1),
        "F1 Score": f1_score(y_test, y_pred, average='binary', pos_label=1),
        "Confusion Matrix": confusion_matrix(y_test, y_pred)
    }

performance_metrics




<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11055 entries, 0 to 11054
Data columns (total 32 columns):
 #   Column                       Non-Null Count  Dtype
---  ------                       --------------  -----
 0   id                           11055 non-null  int64
 1   having_IP_Address            11055 non-null  int64
 2   URL_Length                   11055 non-null  int64
 3   Shortining_Service           11055 non-null  int64
 4   having_At_Symbol             11055 non-null  int64
 5   double_slash_redirecting     11055 non-null  int64
 6   Prefix_Suffix                11055 non-null  int64
 7   having_Sub_Domain            11055 non-null  int64
 8   SSLfinal_State               11055 non-null  int64
 9   Domain_registeration_length  11055 non-null  int64
 10  Favicon                      11055 non-null  int64
 11  port                         11055 non-null  int64
 12  HTTPS_token                  11055 non-null  int64
 13  Request_URL                  11055 non-null  i

{'Logistic Regression': {'Accuracy': 0.9219173952366596,
  'Precision': 0.9312169312169312,
  'Recall': 0.9317098994176813,
  'F1 Score': 0.931463350092617,
  'Confusion Matrix': array([[1298,  130],
         [ 129, 1760]], dtype=int64)},
 'Decision Tree': {'Accuracy': 0.9562858004220681,
  'Precision': 0.957983193277311,
  'Recall': 0.9655902593965061,
  'F1 Score': 0.9617716846823096,
  'Confusion Matrix': array([[1348,   80],
         [  65, 1824]], dtype=int64)},
 'Random Forest': {'Accuracy': 0.9668375037684654,
  'Precision': 0.9611197511664075,
  'Recall': 0.9814716781365802,
  'F1 Score': 0.9711891042430592,
  'Confusion Matrix': array([[1353,   75],
         [  35, 1854]], dtype=int64)},
 'SVM': {'Accuracy': 0.9466385287910762,
  'Precision': 0.9467640918580376,
  'Recall': 0.9602964531498147,
  'F1 Score': 0.9534822601839684,
  'Confusion Matrix': array([[1326,  102],
         [  75, 1814]], dtype=int64)},
 'KNN': {'Accuracy': 0.9354838709677419,
  'Precision': 0.959407569939