In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report


In [6]:
data = {
    "text": [
        "Win money now claim your free prize",
        "Meeting scheduled tomorrow at 10 am",
        "Congratulations you have won a lottery",
        "Please find the attached project report",
        "Limited offer buy now",
        "Team lunch planned this Friday"
    ],
    "spam": [1, 0, 1, 0, 1, 0]
}

df = pd.DataFrame(data)
df


Unnamed: 0,text,spam
0,Win money now claim your free prize,1
1,Meeting scheduled tomorrow at 10 am,0
2,Congratulations you have won a lottery,1
3,Please find the attached project report,0
4,Limited offer buy now,1
5,Team lunch planned this Friday,0


In [3]:
X = df["text"]
y = df["spam"]


In [4]:
vectorizer = TfidfVectorizer(stop_words='english')
X_vectorized = vectorizer.fit_transform(X)


In [7]:
X_train = X_vectorized
X_test = X_vectorized
y_train = y
y_test = y


In [10]:
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)

y_pred_knn = knn.predict(X_test)

print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("KNN Classification Report:")
print(classification_report(y_test, y_pred_knn))


KNN Accuracy: 1.0
KNN Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [11]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)

print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
print("Decision Tree Classification Report:")
print(classification_report(y_test, y_pred_dt))


Decision Tree Accuracy: 1.0
Decision Tree Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         1

    accuracy                           1.00         1
   macro avg       1.00      1.00      1.00         1
weighted avg       1.00      1.00      1.00         1



In [12]:
new_emails = [
    "Congratulations you have won a free prize claim now",
    "Please join the team meeting tomorrow at 11 am"
]

new_emails_vectorized = vectorizer.transform(new_emails)

print("KNN Predictions (1=Spam, 0=Not Spam):")
print(knn.predict(new_emails_vectorized))

print("\nDecision Tree Predictions (1=Spam, 0=Not Spam):")
print(dt.predict(new_emails_vectorized))


KNN Predictions (1=Spam, 0=Not Spam):
[1 1]

Decision Tree Predictions (1=Spam, 0=Not Spam):
[1 1]


In [13]:
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))


KNN Accuracy: 1.0
Decision Tree Accuracy: 1.0
