In [None]:
!pip install tensorflow


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel

from tensorflow.keras import models, layers, callbacks
import joblib

print("✅ All libraries imported successfully!")


In [None]:
df = pd.read_csv(r"C:\Users\ASUS\Desktop\Phishing_URL_detection\dataset\phishing_site_urls.csv")
df.head()

In [None]:
# Convert text labels to numeric values
df['Label'] = df['Label'].map({'bad': 1, 'good': 0})

# Check conversion
df.head()


In [None]:
import re
import numpy as np
from urllib.parse import urlparse

def extract_features(url):
    url = str(url)
    features = []
    
    # Basic structure
    features.append(len(url))                          # URL length
    features.append(url.count('@'))                    # '@' count
    features.append(url.count('-'))                    # '-' count
    features.append(url.count('.'))                    # '.' count
    features.append(url.count('/'))                    # '/' count
    
    # Check for https
    features.append(1 if 'https' in url else 0)
    
    # Check for suspicious keywords
    keywords = ['login', 'secure', 'account', 'bank', 'verify', 'update']
    features.append(sum([1 for k in keywords if k in url.lower()]))
    
    return np.array(features)


In [None]:
# Apply the feature extraction function
X = np.array([extract_features(url) for url in df['URL']])
y = df['Label'].values

print("✅ Features shape:", X.shape)
print("✅ Labels shape:", y.shape)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Split into training & testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

import joblib
joblib.dump(scaler, 'scaler.pkl')

print("✅ Data ready for training!")


In [None]:
from tensorflow.keras import models, layers, callbacks

model = models.Sequential([
    layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dropout(0.2),
    layers.Dense(16, activation='relu'),
    layers.Dropout(0.1),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=20,
    batch_size=32,
    callbacks=[early_stop]
)


In [None]:
loss, acc = model.evaluate(X_test, y_test)
print(f"✅ Model Accuracy: {acc*100:.2f}%")


In [None]:
model.save('phishing_model.h5')
print("✅ Model saved successfully!")


In [None]:

# run in the same notebook where `model` and `scaler` exist
import joblib
import os

# change these if your project folder is elsewhere
models_dir = r"C:\Users\ASUS\Desktop\Phishing_URL_detection\models"
os.makedirs(models_dir, exist_ok=True)

# save scaler (if you have `scaler`)
joblib.dump(scaler, os.path.join(models_dir, 'scaler.pkl'))

# save model (if you used Keras)
model.save(os.path.join(models_dir, 'phishing_model.h5'))   # or .keras if you prefer

print("Saved to:", models_dir)
print("Files now:", os.listdir(models_dir))
