In [None]:
# import libraries
import tensorflow as tf
import pandas as pd
from tensorflow import keras
import tensorflow_datasets as tfds
import numpy as np

print(tf.__version__)

In [None]:
# get data files
!wget https://cdn.freecodecamp.org/project-data/sms/train-data.tsv
!wget https://cdn.freecodecamp.org/project-data/sms/valid-data.tsv

train_file_path = "train-data.tsv"
test_file_path = "valid-data.tsv"

In [None]:
# Read in data from tsv file
train_data = pd.read_csv(train_file_path, sep="\t", names=["class", "text"])
test_data = pd.read_csv(test_file_path, sep="\t", names=["class", "text"])

In [None]:
train_data.head()

In [None]:
MAXLEN = 250
BATCH_SIZE = 64
BUFFER_SIZE = 10000

In [None]:
# Create text encoder
VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(max_tokens=VOCAB_SIZE)
encoder.adapt(train_data['text'].map(lambda text: text))

In [None]:
# Convert pandas DataFrames to TensorFlow Datasets
train_dataset = tf.data.Dataset.from_tensor_slices((train_data['text'], train_data['class']))
test_dataset = tf.data.Dataset.from_tensor_slices((test_data['text'], test_data['class']))

# Map text to encoded sequences and convert labels to numerical format
def encode_text_and_label(text, label):
  encoded_text = encoder(text)
  # Convert 'ham' to 0 and 'spam' to 1
  numerical_label = tf.cast(label == 'spam', tf.int32)
  return encoded_text, numerical_label

train_dataset = train_dataset.map(encode_text_and_label)
test_dataset = test_dataset.map(encode_text_and_label)

# Batch and prefetch the datasets for training using padded_batch
train_dataset = train_dataset.padded_batch(BATCH_SIZE).prefetch(BUFFER_SIZE)
test_dataset = test_dataset.padded_batch(BATCH_SIZE).prefetch(BUFFER_SIZE)

In [None]:
# Setup model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(
        input_dim=len(encoder.get_vocabulary()),
        output_dim=64,
        # Use masking to handle the variable sequence lengths
        mask_zero=True),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])

In [None]:
# Train the model
history = model.fit(train_dataset, epochs=10,
                    validation_data=test_dataset)

In [None]:
test_loss, test_acc = model.evaluate(test_dataset)

print('Test Loss:', test_loss)
print('Test Accuracy:', test_acc)

In [None]:
# Preprocess the input string
encoded_text = encoder(["Hi, how are you"]) # Pass as a list to add a batch dimension

prediction = model.predict(encoded_text)
print(prediction)

In [None]:
# function to predict messages based on model
def predict_message(pred_text):
  # Preprocess the input string
  encoded_text = encoder([pred_text]) # Pass as a list to add a batch dimension

  # Get the prediction from the model
  prediction = model.predict(encoded_text)[0][0]

  # Determine the label based on the prediction (e.g., threshold at 0.5)
  label = 'spam' if prediction > 0.5 else 'ham'

  return [prediction, label]

pred_text = "how are you doing today?"

prediction_result = predict_message(pred_text)
print(prediction_result)

In [None]:
# Run this cell to test your function and model. Do not modify contents.
def test_predictions():
  test_messages = ["how are you doing today",
                   "sale today! to stop texts call 98912460324",
                   "i dont want to go. can we try it a different day? available sat",
                   "our new mobile video service is live. just install on your phone to start watching.",
                   "you have won £1000 cash! call to claim your prize.",
                   "i'll bring it tomorrow. don't forget the milk.",
                   "wow, is your arm alright. that happened to me one time too"
                  ]

  test_answers = ["ham", "spam", "ham", "spam", "spam", "ham", "ham"]
  passed = True

  for msg, ans in zip(test_messages, test_answers):
    prediction = predict_message(msg)
    if prediction[1] != ans:
      passed = False

  if passed:
    print("You passed the challenge. Great job!")
  else:
    print("You haven't passed yet. Keep trying.")

test_predictions()
