In [None]:
# import libraries
try:
  # %tensorflow_version only exists in Colab.
  !pip install tf-nightly
except Exception:
  pass
import tensorflow as tf
import pandas as pd
from tensorflow import keras
!pip install tensorflow-datasets
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

In [None]:
# get data files
!wget https://cdn.freecodecamp.org/project-data/sms/train-data.tsv
!wget https://cdn.freecodecamp.org/project-data/sms/valid-data.tsv

train_file_path = "train-data.tsv"
test_file_path = "valid-data.tsv"

In [None]:
#loading and preprocess

train_data = pd.read_csv(train_file_path, sep='\t', names=['spam','text'])
test_data = pd.read_csv(test_file_path, sep='\t', names=['spam','text'])

max_feat = max(len(train_data.index), len(test_data.index))

train_data['spam'] = pd.factorize(train_data['spam'])[0]
train_data_labels = np.array(train_data.pop('spam'))
train_data_text = np.array(train_data.pop('text'))
test_data['spam'] = pd.factorize(test_data['spam'])[0]
test_data_text = np.array(test_data.pop('text'))
test_data_labels = np.array(test_data.pop('spam'))

max_length_train = len(np.max(train_data_text))
max_length_test = len(np.max(test_data_text))

max_length = max(max_length_test, max_length_train)

print(max_feat, max_length)

In [None]:
#vectorize layer

vect_layer = keras.layers.TextVectorization(max_tokens=max_feat,
                                            standardize='lower_and_strip_punctuation',
                                            split='whitespace',
                                            output_mode='int',
                                            output_sequence_length=max_length)

vect_layer.adapt(train_data_text)


In [None]:
#model

emb_dim = 16

model = keras.Sequential([
    vect_layer,
    keras.layers.Embedding(max_feat, emb_dim, name='embedding'),
    keras.layers.GlobalAveragePooling1D(),
    keras.layers.Dense(16, activation='relu'),
    keras.layers.Dense(1),
])

model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.01),
    loss= tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics= ['accuracy']
)

model.fit(train_data_text, train_data_labels, epochs=10)

In [None]:
#accuracy

loss, acc = model.evaluate(test_data_text, test_data_labels)
print(acc)

test_messages = ["how are you doing today",
                   "sale today! to stop texts call 98912460324",
                   "i dont want to go. can we try it a different day? available sat",
                   "our new mobile video service is live. just install on your phone to start watching.",
                   "you have won £1000 cash! call to claim your prize.",
                   "i'll bring it tomorrow. don't forget the milk.",
                   "wow, is your arm alright. that happened to me one time too"
                  ]
test_messages = np.array(test_messages)
print(test_messages)

prediction = model(test_messages, training=False)
prediction = np.array(prediction > 0) * 1
print(prediction)


In [None]:
# function to predict messages based on model
# (should return list containing prediction and label, ex. [0.008318834938108921, 'ham'])
def predict_message(pred_text):
  model.predict([pred_text])




  return (prediction)

pred_text = "how are you doing today?"

prediction = predict_message(pred_text)
print(prediction)

In [None]:
# Run this cell to test your function and model. Do not modify contents.
def test_predictions():
  test_messages = ["how are you doing today",
                   "sale today! to stop texts call 98912460324",
                   "i dont want to go. can we try it a different day? available sat",
                   "our new mobile video service is live. just install on your phone to start watching.",
                   "you have won £1000 cash! call to claim your prize.",
                   "i'll bring it tomorrow. don't forget the milk.",
                   "wow, is your arm alright. that happened to me one time too"
                  ]

  test_answers = ["ham", "spam", "ham", "spam", "spam", "ham", "ham"]
  passed = True

  for msg, ans in zip(test_messages, test_answers):
    prediction = predict_message(msg)
    if prediction[1] != ans:
      passed = False

  if passed:
    print("You passed the challenge. Great job!")
  else:
    print("You haven't passed yet. Keep trying.")

test_predictions()
