In [2]:
import os
import cv2
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score,precision_score,recall_score
from sklearn.preprocessing import LabelEncoder
from scipy.stats import skew

In [1]:


# Set the path to your labeled dataset folders
malicious_dir = 'QR codes\Malicious\malicious'
non_malicious_dir = r'QR codes\Benign\benign'

# List to store the labels ('malicious' or 'non-malicious') and features
labels = []
features = []

# Function to extract statistical features from an image
def extract_features(image):
    mean = np.mean(image)
    std_dev = np.std(image)
    skewness = skew(image.flatten())
    return [mean, std_dev, skewness]

# Load malicious images and extract features
for filename in os.listdir(malicious_dir):
    if filename.endswith(".png") or filename.endswith(".jpg"):
        labels.append('malicious')
        img = cv2.imread(os.path.join(malicious_dir, filename), cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (128, 128))
        img_features = extract_features(img)
        features.append(img_features)

# Load non-malicious images and extract features
for filename in os.listdir(non_malicious_dir):
    if filename.endswith(".png") or filename.endswith(".jpg"):
        labels.append('non-malicious')
        img = cv2.imread(os.path.join(non_malicious_dir, filename), cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (128, 128))
        img_features = extract_features(img)
        features.append(img_features)

# Convert labels to numerical values
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

# Convert data to DMatrix format for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Set XGBoost parameters
params = {
    'objective': 'binary:logistic',
    'max_depth': 3,
    'learning_rate': 0.1,
    'eval_metric': 'logloss'
}

# Train the XGBoost model
num_round = 100
bst = xgb.train(params, dtrain, num_round)

# Make predictions on the test set
y_pred_probs = bst.predict(dtest)
y_pred = np.round(y_pred_probs)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print metrics
print("Accuracy:", accuracy)
print("Confusion Matrix:")
print(conf_matrix)
print("F1 Score:", f1)

# Save the model
bst.save_model('qr_code_xgboost_model.json')


Accuracy: 0.66625
Confusion Matrix:
[[341  81]
 [186 192]]
F1 Score: 0.5898617511520736


In [8]:
import cv2
import numpy as np
import xgboost as xgb
from scipy.stats import skew
# Load the trained XGBoost model
bst = xgb.Booster(model_file='qr_code_xgboost_model.json')  # Replace with the actual path to your trained XGBoost model

# XgBoost Function to preprocess a new QR code image
def preprocess_image(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (128, 128))
    img_features = [np.mean(img), np.std(img), skew(img.flatten())]
    return np.array(img_features).reshape(1, -1)

# XgBoost Function to predict the class of a new QR code
def predict_qr_code(img_path):
    img_features = preprocess_image(img_path)
    dtest = xgb.DMatrix(img_features)
    prediction = bst.predict(dtest)
    return int(round(prediction[0]))

# Replace 'path/to/new_qr_code.png' with the path to your new QR code image
new_qr_code_path = r'QR codes\Benign\benign_2.png'
prediction = predict_qr_code(new_qr_code_path)

# Print the prediction
if prediction != 0:
    print("Predicted class: non-malicious")
else:
    print("Predicted class: malicious")


Predicted class: non-malicious
