In [1]:
import joblib

# Muat model dan vectorizer
svm_model = joblib.load('svm_model.pkl')
tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')

In [3]:
import re
import pandas as pd

def preprocess_text(text):
    """Fungsi preprocessing yang sama seperti saat training"""
    if pd.isna(text):
        return ""
    
    text = str(text).lower()
    text = re.sub(r'[^a-zA-Z\s]', ' ', text)
    text = ' '.join(text.split())
    return text

# Contoh data baru (bisa dari input user, file, atau database)
new_data = [
    "http://malicious-site.com/steal-data",  # Contoh URL berbahaya
    "https://google.com/search",             # Contoh URL aman
    "click this link: http://phishing.com"   # Contoh URL berbahaya
]

# Preprocess teks
new_data_processed = [preprocess_text(text) for text in new_data]

In [4]:
# Transformasi teks baru ke TF-IDF
new_data_tfidf = tfidf_vectorizer.transform(new_data_processed)

In [5]:
# Prediksi label
predictions = svm_model.predict(new_data_tfidf)

# Prediksi probabilitas (jika diperlukan)
prediction_probas = svm_model.predict_proba(new_data_tfidf)

# Tampilkan hasil
for text, pred, proba in zip(new_data, predictions, prediction_probas):
    print(f"URL: {text}")
    print(f"Prediksi: {pred}")
    print(f"Probabilitas: {dict(zip(svm_model.classes_, proba))}")
    print("---")

URL: http://malicious-site.com/steal-data
Prediksi: berbahaya
Probabilitas: {'aman': np.float64(0.44807460913160846), 'berbahaya': np.float64(0.5519253908683914)}
---
URL: https://google.com/search
Prediksi: berbahaya
Probabilitas: {'aman': np.float64(0.31718201661214074), 'berbahaya': np.float64(0.6828179833878593)}
---
URL: click this link: http://phishing.com
Prediksi: berbahaya
Probabilitas: {'aman': np.float64(0.17127672047807257), 'berbahaya': np.float64(0.8287232795219274)}
---
