In [4]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the dataset
data = pd.read_csv("emails.csv") 

# Keep only the required columns
data = data[['text', 'spam']]

# Map 0 to 'NOT A SPAM COMMENT' and 1 to 'SPAM COMMENT'
data["spam"] = data['spam'].map({0: 'NOT A SPAM COMMENT', 1: 'SPAM COMMENT'})
print(data.sample(15))

# Prepare the features and labels
x = np.array(data['text'])
y = np.array(data['spam'])

# Convert text data into numerical data using CountVectorizer
cv = CountVectorizer()
x = cv.fit_transform(x)

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Initialize and train the SVM model
model = SVC(kernel='linear')  # Using linear kernel for simplicity
model.fit(x_train, y_train)

# Evaluate the model
y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Predict the class of a new comment
S = input("Enter a comment: ")
d = cv.transform([S]).toarray()
prediction = model.predict(d)
print(prediction)


                                                   text                spam
4080  Subject: re : one more thing  clayton ,  i agr...  NOT A SPAM COMMENT
5025  Subject: re : mba polish speakers  does he kno...  NOT A SPAM COMMENT
1457  Subject: dear ms . feldman ,  please find encl...  NOT A SPAM COMMENT
2607  Subject: houston trip  hi jaideep !  my first ...  NOT A SPAM COMMENT
2492  Subject: visiting enron may 4 th  christie ,  ...  NOT A SPAM COMMENT
203   Subject: spamassassin . taint . org  i discove...        SPAM COMMENT
4474  Subject: iv for rama gatiganti rm fifth floor ...  NOT A SPAM COMMENT
3923  Subject: re : f / u to dr . kaminski @ enron f...  NOT A SPAM COMMENT
5481  Subject: in confidence / project status  hi vi...  NOT A SPAM COMMENT
1892  Subject: california 1 / 17 / 01 pt . ii  one o...  NOT A SPAM COMMENT
888   Subject: save your money by getting an oem sof...        SPAM COMMENT
2979  Subject: executive program on credit risk mode...  NOT A SPAM COMMENT
5246  Subjec