In [4]:
import os
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds

print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.config.list_physical_devices("GPU") else "NOT AVAILABLE")

Version:  2.6.0-dev20210407
Eager mode:  True
Hub version:  0.11.0
GPU is NOT AVAILABLE


In [55]:
# get data files
#!wget https://cdn.freecodecamp.org/project-data/sms/train-data.tsv
#!wget https://cdn.freecodecamp.org/project-data/sms/valid-data.tsv

train_file_path = "C:/Users/mail2/Envs/mlearn/train-data.tsv"
test_file_path = "C:/Users/mail2/Envs/mlearn/valid-data.tsv"

In [99]:
# read the dataset into pandas dataframe
training_dataset = pd.read_csv('C:/Users/mail2/Envs/mlearn/train-data.tsv', sep='\t',header=0,names=['class','text'])
testing_dataset = pd.read_csv('C:/Users/mail2/Envs/mlearn/valid-data.tsv', sep='\t',header=0,names=['class','text'])

In [57]:
training_dataset.head()

Unnamed: 0,class,text
0,ham,you can never do nothing
1,ham,"now u sound like manky scouse boy steve,like! ..."
2,ham,mum say we wan to go then go... then she can s...
3,ham,never y lei... i v lazy... got wat? dat day ü ...
4,ham,in xam hall boy asked girl tell me the startin...


In [58]:
# Convert the class values to 1,0s
training_dataset['spam'] = training_dataset['class']
training_dataset['spam'] = pd.Categorical(training_dataset['spam'])
training_dataset['spam'] = training_dataset.spam.cat.codes
training_dataset = training_dataset.drop(columns='class')
target = training_dataset.pop('spam')



In [59]:

training_dataset.head()

Unnamed: 0,text
0,you can never do nothing
1,"now u sound like manky scouse boy steve,like! ..."
2,mum say we wan to go then go... then she can s...
3,never y lei... i v lazy... got wat? dat day ü ...
4,in xam hall boy asked girl tell me the startin...


In [53]:
target.head()

0    0
1    0
2    0
3    0
4    0
Name: spam, dtype: int8

In [75]:
# convert the pandas dataframe to tensor dataset
training_list = training_dataset.values.tolist()
newlist = [item for items in training_list for item in items]
training_dataset_tf = tf.data.Dataset.from_tensor_slices((newlist, target.values))

In [82]:
#view the tensor dataset
train_examples_batch, train_labels_batch = next(iter(training_dataset_tf.batch(10)))
train_examples_batch

<tf.Tensor: shape=(10,), dtype=string, numpy=
array([b'you can never do nothing',
       b'now u sound like manky scouse boy steve,like! i is travelling on da bus home.wot has u inmind 4 recreation dis eve?',
       b'mum say we wan to go then go... then she can shun bian watch da glass exhibition...',
       b'never y lei... i v lazy... got wat? dat day \xc3\xbc send me da url cant work one...',
       b'in xam hall boy asked girl tell me the starting term for dis answer i can den manage on my own after lot of hesitation n lookin around silently she said the! intha ponnungale ipaditan;)',
       b"genius what's up. how your brother. pls send his number to my skype.",
       b'they finally came to fix the ceiling.',
       b'urgent! call 09066350750 from your landline. your complimentary 4* ibiza holiday or 10,000 cash await collection sae t&cs po box 434 sk3 8wp 150 ppm 18+',
       b'now that you have started dont stop. just pray for more good ideas and anything i see that can help y

In [83]:
train_labels_batch

<tf.Tensor: shape=(10,), dtype=int8, numpy=array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0], dtype=int8)>

In [86]:
#create the embedding layer using TFhub
embedding = "https://tfhub.dev/google/nnlm-en-dim128-with-normalization/2"
hub_layer = hub.KerasLayer(embedding, input_shape=[], 
                           dtype=tf.string, trainable=True)
hub_layer(train_examples_batch[:1])

<tf.Tensor: shape=(3, 128), dtype=float32, numpy=
array([[ 0.15721717, -0.02363812,  0.07797033,  0.22276106,  0.19559482,
        -0.19180974,  0.05093085, -0.13103227, -0.15158576,  0.14640023,
        -0.16327888, -0.21670912, -0.02663764, -0.22346522,  0.11174662,
         0.01874302,  0.11723131,  0.07021365, -0.03274394,  0.06300227,
        -0.00887907, -0.12422097, -0.05396116, -0.03563889, -0.06176879,
        -0.00069816,  0.14700063, -0.04761916, -0.05615348,  0.12025253,
         0.03231254, -0.0719848 ,  0.09168004, -0.06877878,  0.04027882,
         0.16307311, -0.17345332, -0.04899427,  0.21478973,  0.08009243,
        -0.11140548, -0.11668202, -0.18458916, -0.3887992 , -0.0025783 ,
         0.21314901, -0.00887572,  0.0061898 ,  0.17256713,  0.01464447,
         0.09401417,  0.09160823, -0.08936025, -0.0034323 , -0.20323145,
        -0.07914589, -0.12887983,  0.00483267, -0.02056812,  0.05430359,
        -0.34453118,  0.04739786,  0.13110074, -0.06827086, -0.1006522 ,
 

In [87]:
#design the model
model = tf.keras.Sequential()
model.add(hub_layer)
model.add(tf.keras.layers.Dense(16, activation='relu'))
model.add(tf.keras.layers.Dense(1))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer_18 (KerasLayer)  (None, 128)               124642688 
_________________________________________________________________
dense (Dense)                (None, 16)                2064      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
Total params: 124,644,769
Trainable params: 124,644,769
Non-trainable params: 0
_________________________________________________________________


In [88]:
#compile the model
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [94]:
#fit the training set
history = model.fit(training_dataset_tf.batch(128),
                    epochs=10,
                    
                    verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [100]:
#prepare the testing dataset by changing class values 0s & 1s and name the column 'spam'
testing_dataset['spam'] = testing_dataset['class']
testing_dataset['spam'] = pd.Categorical(testing_dataset['spam'])
testing_dataset['spam'] = testing_dataset.spam.cat.codes
testing_dataset = testing_dataset.drop(columns='class')
target = testing_dataset.pop('spam')


In [101]:
#create the test tensor flow dataset
testing_list = testing_dataset.values.tolist()
newlist = [item for items in testing_list for item in items]
testing_dataset_tf = tf.data.Dataset.from_tensor_slices((newlist, target.values))

In [102]:
#Let's evaluate and view the results
results = model.evaluate(testing_dataset_tf.batch(512), verbose=2)

for name, value in zip(model.metrics_names, results):
  print("%s: %.3f" % (name, value))

3/3 - 0s - loss: 0.0493 - accuracy: 0.9849
loss: 0.049
accuracy: 0.985


In [109]:
# function to predict messages based on model
# (should return list containing prediction and label, ex. [0.008318834938108921, 'ham'])
def predict_message(pred_text):
  
    probability = model.predict([pred_text])
    
    ans = 'ham' if probability[0][0] < 0 else 'spam'
    result = [probability[0][0], ans]
    return result

pred_text = "how are you doing today?"
predict_message(pred_text)
print(prediction)

[-5.407327, 'ham']


In [107]:
# Run this cell to test your function and model. Do not modify contents.
def test_predictions():
  test_messages = ["how are you doing today",
                   "sale today! to stop texts call 98912460324",
                   "i dont want to go. can we try it a different day? available sat",
                   "our new mobile video service is live. just install on your phone to start watching.",
                   "you have won £1000 cash! call to claim your prize.",
                   "i'll bring it tomorrow. don't forget the milk.",
                   "wow, is your arm alright. that happened to me one time too"
                  ]

  test_answers = ["ham", "spam", "ham", "spam", "spam", "ham", "ham"]
  passed = True

  for msg, ans in zip(test_messages, test_answers):
    prediction = predict_message(msg)
    if prediction[1] != ans:
      passed = False

  if passed:
    print("You passed the challenge. Great job!")
  else:
    print("You haven't passed yet. Keep trying.")

test_predictions()


You passed the challenge. Great job!
