In [None]:
!pip install -q git+https://github.com/tensorflow/docs
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [None]:
# Download and load the SMS spam dataset
!wget -q https://cdn.freecodecamp.org/project-data/sms-text-classification/train-data.tsv
!wget -q https://cdn.freecodecamp.org/project-data/sms-text-classification/valid-data.tsv

train_df = pd.read_csv('train-data.tsv', sep='\t', header=None, names=['label', 'message'])
valid_df = pd.read_csv('valid-data.tsv', sep='\t', header=None, names=['label', 'message'])


In [None]:
# Map labels to 0/1
label_map = {'ham': 0, 'spam': 1}
train_df['label'] = train_df['label'].map(label_map)
valid_df['label'] = valid_df['label'].map(label_map)

# Split features and labels
X_train = train_df['message']
y_train = train_df['label'].values
X_valid = valid_df['message']
y_valid = valid_df['label'].values


In [None]:
# TextVectorization layer
max_features = 10000
sequence_length = 100

vectorize_layer = layers.TextVectorization(
    max_tokens=max_features,
    output_mode='int',
    output_sequence_length=sequence_length
)

# Learn the vocabulary from training data
vectorize_layer.adapt(X_train.values)


In [None]:
# Vectorize text
X_train_vect = vectorize_layer(X_train.values)
X_valid_vect = vectorize_layer(X_valid.values)

# Build tf.data.Dataset objects
batch_size = 32
train_ds = tf.data.Dataset.from_tensor_slices((X_train_vect, y_train)) \
             .shuffle(10000).batch(batch_size).prefetch(tf.data.AUTOTUNE)
valid_ds = tf.data.Dataset.from_tensor_slices((X_valid_vect, y_valid)) \
             .batch(batch_size).prefetch(tf.data.AUTOTUNE)


In [None]:
model = keras.Sequential([
    layers.Embedding(input_dim=max_features + 1, output_dim=16),
    layers.GlobalAveragePooling1D(),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()


In [None]:
history = model.fit(
    train_ds,
    validation_data=valid_ds,
    epochs=10,
    verbose=1
)


In [None]:
def predict_message(message):
    """
    Returns [spam_probability (float), label (str)]
    """
    # Vectorize input
    msg_vect = vectorize_layer(tf.convert_to_tensor([message]))
    # Predict
    prob = float(model.predict(msg_vect)[0][0])
    label = 'spam' if prob > 0.5 else 'ham'
    return [prob, label]

# Quick sanity check
print(predict_message("Congratulations! You've won a free lottery ticket."))
print(predict_message("Hey, are we still meeting for lunch today?"))


In [None]:
# RUN THIS CELL TO TEST YOUR MODEL. DO NOT MODIFY CONTENTS.
loss, accuracy = model.evaluate(valid_ds, verbose=2)
print(f"Validation accuracy: {accuracy:.2%}")

# (The notebook’s built‑in tests will now check your predict_message function.)


In [None]:
# 🚨 RUN THIS CELL TO TEST YOUR FUNCTION AND MODEL. DO NOT MODIFY CONTENTS. 🚨
def test_predictions():
    test_messages = [
        "how are you doing today",
        "sale today! to stop texts call 98912460324",
        "i dont want to go. can we try it a different day? available sat",
        "our new mobile video service is live. just install on your phone to start watching.",
        "you have won £1000 cash! call to claim your prize.",
        "i'll bring it tomorrow. don't forget the milk.",
        "wow, is your arm alright. that happened to me one time too"
    ]

    test_answers = ["ham", "spam", "ham", "spam", "spam", "ham", "ham"]

    passed = True
    for msg, ans in zip(test_messages, test_answers):
        prediction = predict_message(msg)
        print(f"Input: {msg!r}\n → Predicted: {prediction}\n")
        if prediction[1] != ans:
            passed = False

    if passed:
        print("You passed the challenge. Great job!")
    else:
        print("You haven't passed yet. Keep trying.")

test_predictions()
