# 1. Importing Libraries

In [1]:
# Import necessary libraries
import cv2  # OpenCV for video capture and QR code detection
import time  # To calculate FPS
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import re
from urllib.parse import urlparse
from sklearn.preprocessing import LabelEncoder


# 2. Load the Dataset

In [2]:
# Load the dataset
df = pd.read_csv('D:\[03] Code\Quishing Project (B.Tech 2nd Year)\malicious_phish.csv')


  df = pd.read_csv('D:\[03] Code\Quishing Project (B.Tech 2nd Year)\malicious_phish.csv')


# 3. Feature Engineering Functions

In [3]:
# Feature Engineering Functions
def has_ip_address(url):
    return 1 if re.search(r'\d+\.\d+\.\d+\.\d+', url) else 0

def is_abnormal_url(url):
    hostname = urlparse(url).hostname
    return 1 if hostname and hostname not in url else 0

def count_dots(url):
    return url.count('.')

def uses_short_url(url):
    shorteners = ['bit.ly', 'goo.gl', 'tinyurl']
    return 1 if any(shortener in url for shortener in shorteners) else 0

def has_suspicious_words(url):
    suspicious_words = ['login', 'update', 'free', 'bank']
    return 1 if any(word in url for word in suspicious_words) else 0


# 4. Apply Feature Engineering


In [4]:
# Apply feature engineering to the dataset
df['has_ip'] = df['url'].apply(has_ip_address)
df['abnormal_url'] = df['url'].apply(is_abnormal_url)
df['dot_count'] = df['url'].apply(count_dots)
df['short_url'] = df['url'].apply(uses_short_url)
df['url_length'] = df['url'].apply(len)
df['suspicious_words'] = df['url'].apply(has_suspicious_words)


# 5. Encode the Target Labels

In [5]:
# Encode the target labels ('y')
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(df['type'])


# 6. Prepare Data for Training

In [6]:
# Prepare the data for training
X = df[['has_ip', 'abnormal_url', 'dot_count', 'short_url', 'url_length', 'suspicious_words']]
y = y_encoded

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)


# 7. Train the Random Forest Classifier

In [7]:
# Train the Random Forest Classifier
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)


# 8. Prediction Function

In [8]:
# Prediction Function
def predict_url(url, model, label_encoder):
    features = [
        has_ip_address(url),
        is_abnormal_url(url),
        count_dots(url),
        uses_short_url(url),
        len(url),
        has_suspicious_words(url)
    ]
    features = pd.DataFrame([features], columns=X.columns)
    
    # Predict the numerical label
    numerical_prediction = model.predict(features)[0]
    
    # Decode the numerical label to the original class label
    class_prediction = label_encoder.inverse_transform([numerical_prediction])[0]
    
    return class_prediction


# 9. Setup Video Capture and QR Code Detection

In [9]:
# Setup Video Capture and QR Code Detection
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

# Initialize the QRCodeDetector
detector = cv2.QRCodeDetector()


# 10. Main Loop for QR Code Detection

In [10]:
# Main Loop for QR Code Detection
last_value = ""
last_points = None
detection_timeout = time.time()

try:
    while cap.isOpened():
        success, img = cap.read()
        if not success:
            break

        start = time.perf_counter()
        value, points, _ = detector.detectAndDecode(img)

        if value and points is not None:
            last_value = value
            last_points = points
            detection_timeout = time.time()

            # Predict the class of the detected URL
            url_class = predict_url(value, rf, label_encoder)

            # Display the prediction result on the camera feed
            cv2.putText(img, f'URL Class: {url_class}', (30, 110), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255, 0, 0), 2)

        # Display the last detected value if it's within the timeout period
        if last_value and (time.time() - detection_timeout < 2) and last_points is not None:
            x1, y1 = last_points[0][0][0], last_points[0][0][1]
            x2, y2 = last_points[0][2][0], last_points[0][2][1]
            x_center = (x2 - x1) / 2 + x1
            y_center = (y2 - y1) / 2 + y1

            cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 5)
            cv2.circle(img, (int(x_center), int(y_center)), 3, (0, 0, 255), 3)
            cv2.putText(img, str(last_value), (30, int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)

        # Calculate FPS and display it
        end = time.perf_counter()
        fps = 1 / (end - start)
        cv2.putText(img, f'FPS: {int(fps)}', (30, 70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 255, 0), 2)
        cv2.imshow('img', img)

        # Exit on pressing 'ESC'
        if cv2.waitKey(1) & 0xFF == 27:
            break

finally:
    cap.release()
    cv2.destroyAllWindows()


error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\objdetect\src\qrcode.cpp:2951: error: (-2:Unspecified error) in function 'class std::basic_string<char,struct std::char_traits<char>,class std::allocator<char> > __cdecl cv::ImplContour::decode(const class cv::_InputArray &,const class cv::_InputArray &,const class cv::_OutputArray &) const'
> Invalid QR code source points (expected: 'contourArea(src_points) > 0.0'), where
>     'contourArea(src_points)' is 0
> must be greater than
>     '0.0' is 0
