STEP 1: Install required libraries (already in Colab but just in case)

STEP 2: Download and unzip the dataset

In [1]:
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip
!unzip smsspamcollection.zip

--2025-07-11 15:14:51--  https://archive.ics.uci.edu/ml/machine-learning-databases/00228/smsspamcollection.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘smsspamcollection.zip’

smsspamcollection.z     [ <=>                ] 198.65K  --.-KB/s    in 0.07s   

2025-07-11 15:14:51 (2.77 MB/s) - ‘smsspamcollection.zip’ saved [203415]

Archive:  smsspamcollection.zip
  inflating: SMSSpamCollection       
  inflating: readme                  


STEP 3: Import necessary libraries

In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np

STEP 4: Load the dataset

In [3]:
df = pd.read_csv("SMSSpamCollection", sep="\t", names=["label", "message"])

STEP 5: Encode labels (spam = 1, ham = 0)

In [4]:
df["label"] = df["label"].map({"ham": 0, "spam": 1})

STEP 6: Convert text to numerical TF-IDF vectors

In [5]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["message"]).toarray()
y = df["label"].values

STEP 7: Split data into training and testing sets

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 42 -> etunalum podalam

STEP 8: Build the FNN model

In [7]:
model = Sequential()
model.add(Dense(16, activation='relu', input_shape=(X.shape[1],)))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Binary output

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


STEP 9: Compile the model

In [8]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

STEP 10: Train the model

In [9]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 19ms/step - accuracy: 0.8383 - loss: 0.6324 - val_accuracy: 0.8816 - val_loss: 0.3526
Epoch 2/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.9209 - loss: 0.2613 - val_accuracy: 0.9785 - val_loss: 0.1238
Epoch 3/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9819 - loss: 0.0896 - val_accuracy: 0.9857 - val_loss: 0.0707
Epoch 4/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9907 - loss: 0.0418 - val_accuracy: 0.9901 - val_loss: 0.0543
Epoch 5/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9956 - loss: 0.0225 - val_accuracy: 0.9919 - val_loss: 0.0477
Epoch 6/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9964 - loss: 0.0148 - val_accuracy: 0.9919 - val_loss: 0.0451
Epoch 7/10
[1m140/140[0m 

<keras.src.callbacks.history.History at 0x7b8a2a13e750>

STEP 11: Evaluate the model

In [10]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"\n✅ Test Accuracy: {accuracy * 100:.2f}%")

[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9897 - loss: 0.0580

✅ Test Accuracy: 99.10%


STEP 12: Predict custom messages

In [11]:
def predict_message(msg):
    X_input = vectorizer.transform([msg]).toarray()
    prediction = model.predict(X_input)
    return "Spam" if prediction[0][0] > 0.5 else "Not Spam"

Try a few examples

In [18]:
print("\nCustom Message Prediction:")
Sample1 = "Win a FREE iPhone now!!!"
Sample2 = "Hey, are we meeting today?"
print(f"Message: '{Sample1}' →", predict_message(Sample1))
print(f"Message: '{Sample2}' →", predict_message(Sample2))



Custom Message Prediction:
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
Message: 'Win a FREE iPhone now!!!' → Spam
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
Message: 'Hey, are we meeting today?' → Not Spam
