In [15]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.callbacks import ModelCheckpoint
from google.colab import drive
#drive.mount("/content/drive")
#drive.flush_and_unmount()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Load dataset
#sms = pd.read_csv('data/spam.csv', encoding='latin-1')[['v1', 'v2']]
url = "https://raw.githubusercontent.com/geekysudh/cnn-spam-classifier/main/data/spam.csv"
sms = pd.read_csv(url, encoding='latin-1')[['v1', 'v2']]
sms.columns = ['label', 'text']
sms['label'] = sms['label'].map({'ham': 0, 'spam': 1})

In [3]:
# Tokenization
tokenizer = Tokenizer(num_words=10000, oov_token='<OOV>')
tokenizer.fit_on_texts(sms.text)
sequences = tokenizer.texts_to_sequences(sms.text)
padded = pad_sequences(sequences, maxlen=100, padding='post')

In [4]:
X_train, X_test, y_train, y_test = train_test_split(padded, sms.label, test_size=0.2, random_state=42)

In [5]:
# CNN Model
model = Sequential([
    Embedding(input_dim=10000, output_dim=64, input_length=100),
    Conv1D(128, 5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()



In [7]:
# Training
checkpoint_cb = ModelCheckpoint(
    "model_epoch_{epoch:02d}.keras",  # Save by epoch number
    save_freq='epoch',             # or 'batch' for finer control
    save_weights_only=False        # Set to True if saving only weights
)
model.fit(X_train, y_train, epochs=5, validation_split=0.1, batch_size=32, callbacks=[checkpoint_cb])

Epoch 1/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 38ms/step - accuracy: 1.0000 - loss: 2.1326e-04 - val_accuracy: 0.9776 - val_loss: 0.1296
Epoch 2/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 27ms/step - accuracy: 1.0000 - loss: 1.3993e-04 - val_accuracy: 0.9776 - val_loss: 0.1356
Epoch 3/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 26ms/step - accuracy: 1.0000 - loss: 9.5663e-05 - val_accuracy: 0.9776 - val_loss: 0.1381
Epoch 4/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 37ms/step - accuracy: 1.0000 - loss: 7.0821e-05 - val_accuracy: 0.9776 - val_loss: 0.1404
Epoch 5/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 26ms/step - accuracy: 1.0000 - loss: 6.3693e-05 - val_accuracy: 0.9776 - val_loss: 0.1429


<keras.src.callbacks.history.History at 0x7eee581e6e10>

In [8]:
# Evaluation
y_pred = (model.predict(X_test) > 0.5).astype('int32')
print(classification_report(y_test, y_pred))

[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       965
           1       0.99      0.90      0.94       150

    accuracy                           0.99      1115
   macro avg       0.99      0.95      0.97      1115
weighted avg       0.99      0.99      0.99      1115



In [None]:
# Save model
model.save('cnn_spam.keras')

In [None]:
# Download it to your local machine
from google.colab import files
files.download('cnn_spam.keras')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
#Save it back to repo
model.save('cnn_spam.keras')
%ls
#Save it back to repo
# Setup git config
!git config --global user.email "sudhiar@gmail.com"
!git config --global user.name "Sudh-Collab"

# Clone your repo using token
!git clone https://<PAT_TOKEN>@github.com/geekysudh/cnn-spam-classifier.git
# Move into repo and copy model
%mkdir cnn-spam-classifier/models
%ls
!cp cnn_spam.keras cnn-spam-classifier/models/

# Commit and push
!cd cnn-spam-classifier && git add models/cnn_spam.keras
!cd cnn-spam-classifier && git commit -m "Add trained model"
!cd cnn-spam-classifier && git push origin main