In [7]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import re
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_extraction.text import CountVectorizer



In [8]:


# Download NLTK stopwords
nltk.download('stopwords')
from nltk.corpus import stopwords

# Load dataset (SpamAssassin Public Corpus or any other email dataset)
data = pd.read_csv("/content/spam.csv")

# Preprocessing function
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z]', ' ', text)
    words = text.split()
    words = [word for word in words if word not in stopwords.words('english')]
    return ' '.join(words)

# Apply preprocessing
data['clean_text'] = data['Message'].apply(clean_text)

# Convert labels (1 = spam, 0 = good email)
data['label'] = data['Category'].apply(lambda x: 1 if x == 'spam' else 0)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(data['clean_text'], data['label'], test_size=0.2, random_state=42)

# Convert text to numerical representation using CountVectorizer
vectorizer = CountVectorizer(max_features=5000)
X_train_seq = vectorizer.fit_transform(X_train).toarray()
X_test_seq = vectorizer.transform(X_test).toarray()

# Convert to PyTorch tensors
X_train_torch = torch.tensor(X_train_seq, dtype=torch.float32)
X_test_torch = torch.tensor(X_test_seq, dtype=torch.float32)
y_train_torch = torch.tensor(y_train.values, dtype=torch.long)
y_test_torch = torch.tensor(y_test.values, dtype=torch.long)

# Define LSTM model
class LSTMEmailClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMEmailClassifier, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (hidden, _) = self.lstm(x)
        out = self.fc(hidden[-1])
        return out

# Initialize model
input_size = X_train_seq.shape[1]
hidden_size = 128
num_layers = 2
output_size = 2
model = LSTMEmailClassifier(input_size, hidden_size, num_layers, output_size)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Train model
epochs = 10
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train_torch.unsqueeze(1))
    loss = criterion(outputs, y_train_torch)
    loss.backward()
    optimizer.step()
    if (epoch + 1) % 2 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

# Evaluate model
with torch.no_grad():
    y_pred = model(X_test_torch.unsqueeze(1))
    y_pred = torch.argmax(y_pred, dim=1).numpy()
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)
    print("Classification Report:\n", classification_report(y_test, y_pred))

# Notification System
def notify_email(text):
    text_seq = vectorizer.transform([clean_text(text)]).toarray()
    text_tensor = torch.tensor(text_seq, dtype=torch.float32).unsqueeze(1)
    with torch.no_grad():
        prediction = model(text_tensor)
        label = torch.argmax(prediction, dim=1).item()
    if label == 0:
        print("📩 New Good Email Received!")
    else:
        print("🚫 Spam detected. No notification.")


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Epoch [2/10], Loss: 0.6315
Epoch [4/10], Loss: 0.4226
Epoch [6/10], Loss: 0.1710
Epoch [8/10], Loss: 0.0810
Epoch [10/10], Loss: 0.0493
Accuracy: 0.9811659192825112
Classification Report:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99       966
           1       1.00      0.86      0.92       149

    accuracy                           0.98      1115
   macro avg       0.99      0.93      0.96      1115
weighted avg       0.98      0.98      0.98      1115



In [9]:
sample_email = "you have won a lottery!!!!"
notify_email(sample_email)


📩 New Good Email Received!
