<a href="https://colab.research.google.com/github/adrcynical01/DLI_Assignment/blob/main/DLI_Assignment_Official.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1.0 Importing the Libraries

In [None]:
import pandas as pd
import numpy as np
import random
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns

# Set seeds for reproducibility
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)

1.1 Loading Dataset from google drive

1.1A Loading through CSV file

In [None]:
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

1.1B Loading through Mounting Address

In [None]:
# Mount your Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Importing the dataset
dataset = pd.read_csv('/content/drive/My Drive/Colab Notebooks/phishing_dataset.csv')

print("Number of total records:", len(dataset))
print()
dataset.head()

2.0 Preparing the Data

In [None]:
# Separate input features (X) from the target label (y)
# 'Result' is the column that contains the label: 1 = phishing, -1 = legitimate
X = dataset.drop('Result', axis=1).values
y = dataset['Result'].values

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np

# Split into training and test sets using stratified sampling to preserve class balance
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Convert labels from -1/1 to 0/1 for binary classification
y_train = np.where(y_train == -1, 0, 1)
y_test = np.where(y_test == -1, 0, 1)

# Optional: Check class distribution to confirm balance
print("Train set class distribution:", dict(zip(*np.unique(y_train, return_counts=True))))
print("Test set class distribution:", dict(zip(*np.unique(y_test, return_counts=True))))

3.0 Defining the Neural Network Model

In [None]:
# Importing the Neccessary Libraries for Neural Network

from keras.models import Sequential
from keras.layers import Dense

4.0 Compiling and Training the Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils import class_weight
import numpy as np

# Rebuild the model with more capacity
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))  # More neurons
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Binary output

# Print model architecture
model.summary()

# Compile the model using binary crossentropy for binary classification
# 'adam' optimizer adjusts learning efficiently
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Early stopping to prevent overfitting if val_loss stops improving
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Compute class weights from training labels
class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weight_dict = {cls: weight for cls, weight in zip(np.unique(y_train), class_weights)}
print("Class Weights:", class_weight_dict)

# Train the model with training data, using 20% for validation
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stop],
    class_weight=class_weight_dict,
    verbose=1
)


5.0 Model Evaluation

In [None]:
# Evaluate the model on the test set
_, accuracy = model.evaluate(X_test, y_test, verbose=0)
print("Test Accuracy: %.2f%%" % (accuracy * 100))
