In [73]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping


import random
random.seed(42)

In [74]:
data = pd.read_csv('data.csv')
texts = data['text'].tolist()
labels = data['label'].tolist()

In [75]:
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

In [76]:
max_len = 256
X = pad_sequences(sequences, maxlen=max_len)

In [77]:
y = np.array(labels)

In [78]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [79]:
model = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_len),
    LSTM(256, return_sequences=False),
    Dropout(0.5),
    Dense(128 , activation='relu'),
    Dense(1, activation='sigmoid')
])

In [80]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [81]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [82]:
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=16,
    validation_split=0.2,
    callbacks=[early_stopping]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10


In [83]:
loss_train , accuracy_train = model.evaluate(X_train, y_train)
print(f"Training Accuracy: {accuracy_train:.4f}")

Training Accuracy: 0.9971


In [84]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.9883


In [85]:
def predict_spam(texts):
    sequences = tokenizer.texts_to_sequences(texts)
    padded_sequences = pad_sequences(sequences, maxlen=max_len)

    predictions = model.predict(padded_sequences)

    return ['Spam' if pred > 0.5 else 'Not Spam' for pred in predictions]

In [86]:
text = ["Congratulations! You've won a free ticket to the cinema. Call now to claim your prize!"]
predictions = predict_spam(text)
for t, label in zip(text, predictions):
    print(f"Text: {t}\nPredicted Label: {label}\n")

Text: Congratulations! You've won a free ticket to the cinema. Call now to claim your prize!
Predicted Label: Spam



In [87]:
model.save('model.h5')

In [88]:
from joblib import dump
with open('tokenizer.h5' , 'wb') as f:
    dump(tokenizer, f)

In [89]:
import json
params = {
    'max_len' : max_len,
    'num_words' : 5000,
}

with open('params.json', 'w') as f:
    json.dump(params, f)