In [9]:
import pandas as pd 
import re
import os 
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
import tkinter as tk
from tkinter import messagebox
import customtkinter as ctk

# Load and prepare the data
df=pd.read_csv("main_data.csv")
df.dropna(inplace=True)
df.isnull().sum()

# Assuming 'EmailText' contains the text data and 'Label' contains the labels
X = df['EmailText']
y = df['Label']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Tokenization and padding
max_words = 10000  # Vocabulary size
tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(X_train)

# Convert texts to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Pad the sequences so they are all the same length
maxlen = 200  # Maximum sequence length
X_train_pad = pad_sequences(X_train_seq, maxlen=maxlen, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=maxlen, padding='post', truncating='post')

# Convert the labels to categorical (for binary classification using softmax)
y_train = to_categorical(y_train, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)

# Build the model
model = Sequential()
embedding_dim = 128
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim, input_shape=(maxlen,)))
model.add(Bidirectional(LSTM(units=64)))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))  # 2 classes: ham and spam

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Train the model
batch_size = 64
epochs = 10
history = model.fit(X_train_pad, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test_pad, y_test))

# Function to pop up an alert using Tkinter
def show_spam_alert():
    root = tk.Tk()
    root.withdraw()  # Hide the main Tkinter window
    messagebox.showwarning("Spam Alert", "This email is classified as Spam!")
    root.destroy()  # Close the Tkinter window after the alert is dismissed

# Sample email to predict


  super().__init__(**kwargs)


Epoch 1/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 85ms/step - accuracy: 0.8077 - loss: 0.4200 - val_accuracy: 0.9652 - val_loss: 0.1123
Epoch 2/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 133ms/step - accuracy: 0.9739 - loss: 0.0992 - val_accuracy: 0.9695 - val_loss: 0.0792
Epoch 3/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 81ms/step - accuracy: 0.9862 - loss: 0.0460 - val_accuracy: 0.9882 - val_loss: 0.0533
Epoch 4/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 81ms/step - accuracy: 0.9924 - loss: 0.0257 - val_accuracy: 0.9909 - val_loss: 0.0441
Epoch 5/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 76ms/step - accuracy: 0.9957 - loss: 0.0201 - val_accuracy: 0.9898 - val_loss: 0.0411
Epoch 6/10
[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 76ms/step - accuracy: 0.9977 - loss: 0.0090 - val_accuracy: 0.9920 - val_loss: 0.0323
Epoch 7/10
[1m117

In [33]:
new_emails = ["""
Dear Customer,

You have won a $1,000 Walmart Gift Card! To claim your prize, please click the link below and enter your information.

[Click Here to Claim]

Hurry! This offer is only valid for a limited time.

Best regards,
Walmart Rewards Team

"""]

In [11]:

# Initialize the CustomTkinter application
ctk.set_appearance_mode("light")  # Modes: "System" (default), "Dark", "Light"
ctk.set_default_color_theme("blue")  # Themes: "blue" (default), "green", "dark-blue"

# Create the main application window
app = ctk.CTk()
app.title("CustomTkinter Mailbox")
app.geometry("800x600")

# Create the frame for the list of emails on the left
email_list_frame = ctk.CTkFrame(app, width=200)
email_list_frame.grid(row=0, column=0, sticky="nswe", padx=20, pady=20)

# Add a label to simulate email list
email_list_label = ctk.CTkLabel(email_list_frame, text="Inbox", font=("Helvetica", 16, "bold"))
email_list_label.pack(pady=10)

# Add a few placeholder buttons for the email list (representing individual emails)
email_1_button = ctk.CTkButton(email_list_frame, text="Email 1", width=180)
email_1_button.pack(pady=5)

email_2_button = ctk.CTkButton(email_list_frame, text="Email 2", width=180)
email_2_button.pack(pady=5)

email_3_button = ctk.CTkButton(email_list_frame, text="Email 3", width=180)
email_3_button.pack(pady=5)

# Create a frame for displaying the selected email content
email_viewer_frame = ctk.CTkFrame(app)
email_viewer_frame.grid(row=0, column=1, sticky="nswe", padx=20, pady=20)

# Add a label to indicate email viewing area
email_viewer_label = ctk.CTkLabel(email_viewer_frame, text="Email Viewer", font=("Helvetica", 16, "bold"))
email_viewer_label.pack(pady=10)

# Create a textbox for displaying email content (non-editable)
email_content = ctk.CTkTextbox(email_viewer_frame, width=500, height=400, font=("Arial", 12))
email_content.pack(pady=10, padx=10)

# Insert the email content into the textbox and make it non-editable
email_content.insert("1.0", new_emails[0])
email_content.configure(state="disabled")  # Make the content read-only

# Adjust grid row/column configuration to make the layout responsive
app.grid_columnconfigure(1, weight=1)
app.grid_rowconfigure(0, weight=1)

# Run the Tkinter event loop
app.mainloop()


In [77]:
# Tokenize and pad the email sequences
new_sequences = tokenizer.texts_to_sequences(new_emails)
new_padded = pad_sequences(new_sequences, maxlen=maxlen, padding='post', truncating='post')

# Predict the probabilities of each class (ham=0, spam=1)
predictions = model.predict(new_padded)
print("Predicted probabilities:", predictions)

# Set a custom threshold for classifying spam
spam_threshold = 0.996

# Get the probability of the email being classified as spam (class 1)
spam_probabilities = predictions[:, 1]

# Apply the threshold: if spam probability > threshold, classify as spam (1), else ham (0)
predicted_labels = (spam_probabilities > spam_threshold).astype(int)
print("Predicted labels based on threshold:", predicted_labels)

# If the predicted label is spam, show the spam alert
if predicted_labels == 1:
    show_spam_alert()  # Trigger the Tkinter spam alert popup


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Predicted probabilities: [[5.4897486e-05 9.9994504e-01]]
Predicted labels based on threshold: [1]


# Email 1: Legitimate Appointment Reminder

In [70]:
new_emails = ["""Subject: Your Upcoming Appointment with Dr. Reynolds

Dear [Patient's Name],

This is a confirmation for your appointment with Dr. Reynolds scheduled for October 15, 2024, at 2:00 PM. Please remember to bring your updated medical records and insurance card.

Location:
Community Health Clinic
450 Wellness Drive, Springfield, ST 12345

For any changes, please contact our front desk at (555) 001-2345.

Best Regards,
Community Health Clinic Administrative Team



"""]


# Email 2: Phishing Attempt - Fake Bill

In [72]:
new_emails = ["""Subject: Urgent: Outstanding Medical Bill Requires Immediate Payment

Dear Valued Patient,

We have detected an unresolved bill from your recent surgery. Please settle the outstanding amount of $3,500 to avoid penalties. Kindly use the link below to make the payment promptly using our secure payment portal.

[Phishing Link Disguised as Payment Portal]

For any discrepancies, please contact our billing department immediately at (555) 987-6543.

Best Regards,
Springfield Medical Center Financial Department

"""]


# Email 3: Legitimate Lab Results

In [74]:
new_emails = ["""Subject: Lab Results Available for [Patient's Name]

Hello [Patient's Name],

Your recent lab results are now available and have been uploaded to your patient portal. To view the details, please log in to your account using the following link:

[Legitimate Hospital Portal Link]

If you have any questions about your results, please do not hesitate to contact Dr. Elaine Moss’s office at (555) 123-4567.

Warm regards,
Springfield Medical Center

"""]


# Email 4: Phishing Attempt - Fake Insurance Claim

In [76]:
new_emails = ["""Subject: Urgent: Verification Required for Continued Insurance Coverage

Dear [Patient's Name],

We have identified an issue with your recent insurance claim submission related to the procedure on September 25, 2024. Due to inconsistencies in the data provided, immediate verification is necessary to maintain uninterrupted insurance coverage.

Please access your insurance profile through the secure link below and confirm your details:

[Phishing Link Disguised as Insurance Verification]

It is crucial to address this matter promptly to avoid potential penalties or disruption in your medical coverage. If you believe you have received this email in error, please do not click on any links and contact our support directly at (555) 987-6543 to resolve this issue safely.
Please settle the outstanding amount of $3,500 to avoid penalties. Kindly use the link below to make the payment promptly using our secure payment portal.

[Phishing Link Disguised as Payment Portal]
Thank you for your immediate attention to this urgent matter. Ensuring the accuracy of our records is essential for providing you with continuous healthcare services.

Warm Regards,
Patient Insurance Relations
Springfield Medical Associates

Note: If you do not verify your account within 48 hours, your coverage may be temporarily suspended pending further investigation.


"""]


# Email 5: Legitimate Staff Communication

In [62]:
new_emails = ["""Subject: Updated Staff Contact List

Dear Team,

Please find attached the updated contact list for all department heads and administrators. Let’s ensure we’re all updated to facilitate smooth inter-department communications.

[Attachment: Staff_Contacts_Oct2024.pdf]

Best regards,
Janet Greene
HR Department, Springfield Medical Center


"""]
