In [19]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
import pickle
from win32com.client import Dispatch
import tkinter as tk

In [20]:
# Loading the dataset
dataset = pd.read_csv("spam.csv", encoding="latin-1")

# Renaming columns for clarity
dataset.columns = ['label', 'message', 'col3', 'col4', 'col5']

# Removing unnecessary columns
dataset.drop(['col3', 'col4', 'col5'], axis=1, inplace=True)

# Mapping labels to numerical values
dataset['label'] = dataset['label'].map({'ham': 0, 'spam': 1})

# Inspecting the first few rows
dataset.head()

Unnamed: 0,label,message
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."


In [21]:
# Splitting data into features (X) and target (y)
X_messages = dataset['message']
y_labels = dataset['label']

# Checking for missing values
print("Missing values per column:\n", dataset.isnull().sum())

# Vectorizing the text data
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X_messages)

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X_vectorized, y_labels, test_size=0.2, random_state=42
)

Missing values per column:
 label      0
message    0
dtype: int64


In [22]:
# Training the Naive Bayes model
spam_classifier = MultinomialNB()
spam_classifier.fit(X_train, y_train)

# Evaluating the model
accuracy = spam_classifier.score(X_test, y_test)
print("Model accuracy:", accuracy)

Model accuracy: 0.97847533632287


In [23]:
# Saving the model to a file
pickle.dump(spam_classifier, open('spam_model.pkl', 'wb'))

# Loading the saved model
loaded_model = pickle.load(open('spam_model.pkl', 'rb'))

In [24]:
# Testing with a sample message
sample_message = "Congratulations! You've won a free cruise."
sample_vect = vectorizer.transform([sample_message]).toarray()
prediction = loaded_model.predict(sample_vect)
print("Prediction for sample message:", "Spam" if prediction[0] else "Not Spam")

Prediction for sample message: Spam


In [25]:
# Function to speak the output
def speak_message(output_text):
    speaker = Dispatch("SAPI.SpVoice")
    speaker.Speak(output_text)

In [26]:
# GUI Application for Spam Detection
def create_gui():
    root = tk.Tk()
    root.geometry("400x300")
    root.title("Spam Email Classifier")

    tk.Label(root, text="Spam Email Classifier", font=("Helvetica", 16)).pack(pady=10)
    tk.Label(root, text="Enter Email Content:").pack(pady=5)

    input_text = tk.Entry(root, width=50)
    input_text.pack(pady=5)

    def classify_message():
        user_input = input_text.get()
        input_vect = vectorizer.transform([user_input]).toarray()
        prediction = loaded_model.predict(input_vect)
        if prediction[0] == 1:
            result_text = "This is a Spam Email"
        else:
            result_text = "This is NOT a Spam Email"
        speak_message(result_text)
        tk.Label(root, text=result_text, fg="blue", font=("Helvetica", 12)).pack(pady=5)

    classify_button = tk.Button(root, text="Classify", command=classify_message)
    classify_button.pack(pady=10)

    root.mainloop()

# Launching the GUI
create_gui()