In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load the dataset
data = pd.read_csv("emails.csv") 

# Keep only the required columns
data = data[['text', 'spam']]

# Map 0 to 'NOT A SPAM COMMENT' and 1 to 'SPAM COMMENT'
data["spam"] = data['spam'].map({0: 'NOT A SPAM COMMENT', 1: 'SPAM COMMENT'})
print(data.sample(15))

# Prepare the features and labels
x = np.array(data['text'])
y = np.array(data['spam'])

# Convert text data into numerical data using CountVectorizer
cv = CountVectorizer()
x = cv.fit_transform(x)

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Initialize and train the Logistic Regression model
model = LogisticRegression()
model.fit(x_train, y_train)

# Evaluate the model
y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Predict the class of a new comment
S = input("Enter a comment: ")
d = cv.transform([S]).toarray()
prediction = model.predict(d)
print(f"Prediction: {'SPAM COMMENT' if prediction[0] == 'SPAM COMMENT' else 'NOT A SPAM COMMENT'}")


                                                   text                spam
5454  Subject: re : dr . michelle foss - energy inst...  NOT A SPAM COMMENT
5115  Subject: grades  pam ,  another team :  elena ...  NOT A SPAM COMMENT
5448  Subject: engineering meetings in broomfield co...  NOT A SPAM COMMENT
1096  Subject: viagra is it the right medication for...        SPAM COMMENT
2611  Subject: co - integration  zimin ,  andrea ree...  NOT A SPAM COMMENT
3429  Subject: re : mg integration support - daily u...  NOT A SPAM COMMENT
4841  Subject: network design optimization .  fyi , ...  NOT A SPAM COMMENT
688   Subject: save your money buy getting this thin...        SPAM COMMENT
5634  Subject: summer part time employee  add her to...  NOT A SPAM COMMENT
1936  Subject: financial mathematics grad from u of ...  NOT A SPAM COMMENT
927   Subject: julie invites you to her free webcam ...        SPAM COMMENT
950   Subject: lock in your clients ' gains !  a win...        SPAM COMMENT
350   Subjec