In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Load dataset
data = pd.read_csv('spam.csv')  # Replace 'email_data.csv' with your actual file path

# Extract features and target variable
X = data['EmailText'].astype(str)  # Convert the 'EmailText' column to string type
y = data['Label']  # Use 'Label' column as the target

# Encode the 'Label' column (ham -> 0, spam -> 1)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Vectorize text data
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Support Vector Machine Classifier
model = SVC(kernel='linear')
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Test input for prediction
test_input = ["Congratulations! You've won a free ticket. Call now to claim your prize."]
test_input_transformed = vectorizer.transform(test_input)
prediction = model.predict(test_input_transformed)
predicted_label = label_encoder.inverse_transform(prediction)

print(f"\nTest Input: {test_input[0]}")
print(f"Predicted Label: {predicted_label[0]}")
# Test input for prediction
test_input = ["Hey, are we still meeting tomorrow at 5 PM?"]

test_input_transformed = vectorizer.transform(test_input)
prediction = model.predict(test_input_transformed)
predicted_label = label_encoder.inverse_transform(prediction)

print(f"\nTest Input: {test_input[0]}")
print(f"Predicted Label: {predicted_label[0]}")



Classification Report:
               precision    recall  f1-score   support

         ham       0.98      1.00      0.99       965
        spam       0.97      0.84      0.90       150

    accuracy                           0.97      1115
   macro avg       0.97      0.92      0.94      1115
weighted avg       0.97      0.97      0.97      1115


Test Input: Congratulations! You've won a free ticket. Call now to claim your prize.
Predicted Label: spam

Test Input: Hey, are we still meeting tomorrow at 5 PM?
Predicted Label: ham
