In [1]:
import tensorflow as tf
import re
import numpy as np

# --- Load the model ---
model = tf.keras.models.load_model("./url_classifier_model.keras")

# --- Preprocessing function (same as training) ---
def preprocess_url_for_keras(url):
    url = url.lower()
    url = re.sub(r'https?://|www\.', '', url)
    url = url[:500]
    url = " ".join(list(url))
    return url

# --- Example raw URLs ---
raw_urls = ["https://facebook.com/login.php", "https://youtube.com", "https://malwarecom.gov"]


# --- Predict ---
processed_urls = [preprocess_url_for_keras(url) for url in raw_urls]
processed_urls = np.array(processed_urls, dtype=object)

predictions = model.predict(processed_urls)
pred_labels = (predictions > 0.5).astype(int).flatten()

# --- Display results ---
for url, pred, prob in zip(raw_urls, pred_labels, predictions.flatten()):
    print(f"{url}\n → Predicted: {'Độc hại (1)' if pred else 'Sạch (0)'} ({prob:.4f})\n")

2025-11-04 00:19:21.401519: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-11-04 00:19:21.907940: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-04 00:19:24.398478: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
E0000 00:00:1762190365.793343   10253 cuda_executor.cc:1309] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1762190365.798449   10253 gpu_device.cc:2342] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are install

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step
https://facebook.com/login.php
 → Predicted: Sạch (0) (0.4673)

https://youtube.com
 → Predicted: Sạch (0) (0.0046)

https://malwarecom.gov
 → Predicted: Sạch (0) (0.0396)



In [10]:
import random

def generate_test_urls(n=100000):
    domains = [
        "facebook.com", "google.com", "youtube.com", "amazon.com", "paypal.com", 
        "github.com", "apple.com", "microsoft.com", "netflix.com", "linkedin.com",
        "bankofamerica.com", "secure-login.net", "verify-account.io", "update-info.ru",
        "malware-download.cc", "phishingsite.biz", "darkwebmarket.to", "ransomportal.co",
        "trojanupdate.org", "safe-shopping.store"
    ]
    paths = [
        "login", "signin", "verify", "account", "secure", "update", "home", "video", 
        "profile", "settings", "transfer", "payment", "checkout", "download", "offer",
        "promo", "freegift", "security", "reset", "support"
    ]
    params = [
        "?id=", "?session=", "?ref=", "?token=", "?user=", "?redirect=", "?click=", "?code=",
        "?auth=", "?key="
    ]

    urls = []
    for _ in range(n):
        domain = random.choice(domains)
        sub = random.choice(["www", "mail", "secure", "app", ""])  # optional subdomain
        scheme = random.choice(["http", "https"])
        path = "/".join(random.choices(paths, k=random.randint(1, 3)))
        param = random.choice(params) + ''.join(random.choices("abcdefghijklmnopqrstuvwxyz0123456789", k=8))
        url = f"{scheme}://{sub+'.' if sub else ''}{domain}/{path}{param}"
        urls.append(url)
    
    return urls


# Example usage
test_urls = generate_test_urls(100000)

# To test speed
processed_urls = [preprocess_url_for_keras(url) for url in test_urls]
processed_urls = np.array(processed_urls, dtype=object)

import time
start = time.time()
predictions = model.predict(processed_urls, batch_size=128, verbose=0)
elapsed = time.time() - start

print(f"Predicted {len(test_urls)} URLs in {elapsed:.2f} seconds ({len(test_urls)/elapsed:.2f} URLs/sec)")


Predicted 100000 URLs in 12.07 seconds (8281.66 URLs/sec)


In [4]:
model1 = tf.keras.models.load_model("./url_classifier_model.keras")
model1.summary()



In [5]:
model12 = tf.keras.models.load_model("./best_modelsaved.keras")
model12.summary()



In [3]:
raw_urls

['https://facebook.com/login.php',
 'https://youtube.com',
 'https://malwarecom.gov']

In [4]:
type(predictions)

numpy.ndarray

In [5]:
predictions

array([[0.46726856],
       [0.00464447],
       [0.03957638]], dtype=float32)

In [20]:
type(float(round(predictions[0][0], 2)))


float

In [16]:
pred_labels

array([0, 0, 0])

In [37]:
label = 'MALICIOUS' if score > 0.5 else 'BENIGN'
label

'BENIGN'