In [1]:
# Import library
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

In [2]:
# Contoh data (email sederhana)
emails = [
    "Congratulations! You've won a $1,000 Walmart gift card. Click here to claim now!",  # Spam
    "Meeting rescheduled to 3 PM tomorrow. Please confirm.",                             # Non-Spam
    "Earn money from home. Work only 2 hours a day!",                                    # Spam
    "Your order has been shipped and will arrive soon.",                                 # Non-Spam
    "Limited-time offer! Get 50% off on all products.",                                  # Spam
    "Let's catch up for lunch tomorrow at 1 PM.",                                       # Non-Spam
]

# Label data: 1 = Spam, 0 = Non-Spam
labels = [1, 0, 1, 0, 1, 0]

# Convert teks email ke fitur numerik (bag of words)
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(emails)

# Bagi dataset menjadi data latih dan uji
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.3, random_state=42)

# Latih model SVM
svm_model = SVC(kernel='linear', C=1.0)
svm_model.fit(X_train, y_train)

# Prediksi data uji
y_pred = svm_model.predict(X_test)

In [3]:
# Evaluasi model
print("Akurasi:", accuracy_score(y_test, y_pred))
print("\nLaporan Klasifikasi:\n", classification_report(y_test, y_pred))

Akurasi: 1.0

Laporan Klasifikasi:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



In [4]:
# Uji dengan email baru
new_emails = [
    "Get rich quick! Work from home for just 2 hours a day.",
    "Hi, are we still on for the meeting tomorrow?"
]
new_features = vectorizer.transform(new_emails)
predictions = svm_model.predict(new_features)

for email, label in zip(new_emails, predictions):
    print(f"'{email}' --> {'Spam' if label == 1 else 'Non-Spam'}")

'Get rich quick! Work from home for just 2 hours a day.' --> Spam
'Hi, are we still on for the meeting tomorrow?' --> Non-Spam
