In [27]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [28]:
df = pd.read_csv('spam.csv', encoding='ISO-8859-1')

df.drop(["Unnamed: 2", "Unnamed: 3", "Unnamed: 4"], axis=1, inplace=True)

tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['v2'])
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(df['v2'])
padded_sequences = pad_sequences(sequences, maxlen=100, padding='post', truncating='post')

labels = df['v1'].apply(lambda x: 1 if x == 'spam' else 0).values

In [29]:
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.25)

In [30]:
model = tf.keras.Sequential([
    Embedding(len(word_index) + 1, 128, input_length=100),
    LSTM(64),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [31]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2c993be80>

In [32]:
loss, accuracy = model.evaluate(X_test, y_test)
print('Test accuracy:', accuracy)

Test accuracy: 0.8700646162033081


# Change model type and package it

In [33]:
model.save('spam_ham_model.h5')


  saving_api.save_model(


In [34]:
import tensorflow as tf
loaded_model = tf.keras.models.load_model('spam_ham_model.h5')

converter = tf.lite.TFLiteConverter.from_keras_model(loaded_model)
converter.experimental_enable_resource_variables = True
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
tflite_model = converter.convert()

with open('model.tflite', 'wb') as f:
    f.write(tflite_model)

INFO:tensorflow:Assets written to: /var/folders/k_/x2cc2f4901v6lyz_t7w3w3h40000gn/T/tmpol3qin1i/assets


INFO:tensorflow:Assets written to: /var/folders/k_/x2cc2f4901v6lyz_t7w3w3h40000gn/T/tmpol3qin1i/assets
2024-02-03 15:43:57.103634: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-02-03 15:43:57.103647: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-02-03 15:43:57.103756: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/k_/x2cc2f4901v6lyz_t7w3w3h40000gn/T/tmpol3qin1i
2024-02-03 15:43:57.106983: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-02-03 15:43:57.106988: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/k_/x2cc2f4901v6lyz_t7w3w3h40000gn/T/tmpol3qin1i
2024-02-03 15:43:57.116952: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2024-02-03 15:43:57.167429: I tensorflow/cc/saved_model/loader.cc:217] Running initialization