In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB

In [3]:
data = pd.read_csv("spam.csv", encoding="cp1252")  # Common on Windows
data = data.rename(columns={"v1": "category", "v2": "message"})
data['Spam']=data['category'].apply(lambda x:1 if x=='spam' else 0)
X_train,X_test,y_train,y_test=train_test_split(data.message,data.Spam,test_size=0.25)

In [4]:
emails=[
    'Sounds great! Are you home now?',
    'Will u meet ur dream partner soon? Is ur career off 2 a flyng start? 2 find out free, txt HORO followed by ur star sign, e. g. HORO ARIES'
]

### Deep Learning Models

In [8]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Parameters
max_words = 5000  # Vocabulary size
max_len = 100  # Max sequence length

# Tokenize and pad sequences
tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(data['message'])
X = tokenizer.texts_to_sequences(data['message'])
X = pad_sequences(X, maxlen=max_len)

# Convert labels to binary
y = (data['category'] == 'spam').astype(int)

# LSTM Model
model = Sequential([
    Embedding(max_words, 128, input_length=max_len),
    SpatialDropout1D(0.2),
    LSTM(100, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=5, batch_size=32, validation_split=0.2)


import numpy as np

def predict_message(model, tokenizer, message, max_len=100):
    # Preprocess the message
    sequence = tokenizer.texts_to_sequences([message])
    padded_sequence = pad_sequences(sequence, maxlen=max_len)

    # Make prediction
    prediction = model.predict(padded_sequence)[0][0]

    # Interpret the result
    return "Spam" if prediction > 0.5 else "Not Spam", prediction

# Example message
message = "Congratulations! You've won a free iPhone. Click here to claim now!"
label, score = predict_message(model, tokenizer, message)

print(f"Message: {message}")
print(f"Prediction: {label} (Score: {score:.4f})")


Epoch 1/5




[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 30ms/step - accuracy: 0.8982 - loss: 0.2957 - val_accuracy: 0.9839 - val_loss: 0.0550
Epoch 2/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - accuracy: 0.9883 - loss: 0.0458 - val_accuracy: 0.9874 - val_loss: 0.0424
Epoch 3/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - accuracy: 0.9954 - loss: 0.0214 - val_accuracy: 0.9883 - val_loss: 0.0416
Epoch 4/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - accuracy: 0.9980 - loss: 0.0111 - val_accuracy: 0.9910 - val_loss: 0.0399
Epoch 5/5
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - accuracy: 0.9986 - loss: 0.0061 - val_accuracy: 0.9812 - val_loss: 0.0458
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 181ms/step
Message: Congratulations! You've won a free iPhone. Click here to claim now!
Prediction: Spam (Score: 0.9578)


In [6]:
# model = Sequential([
#     Embedding(max_words, 128, input_length=max_len),
#     SpatialDropout1D(0.2),
#     tf.keras.layers.Bidirectional(LSTM(100, dropout=0.2, recurrent_dropout=0.2)),
#     Dense(1, activation='sigmoid')
# ])
# model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# model.fit(X, y, epochs=5, batch_size=32, validation_split=0.2)

In [None]:
# import tensorflow as tf

# # Save the entire model in the recommended Keras format
# model.save("spam_classifier_lstm.keras")

# # Optionally, save the tokenizer as well
# import pickle
# with open("tokenizer.pkl", "wb") as f:
#     pickle.dump(tokenizer, f)
