In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
df = pd.read_csv('spam_ham.csv')

df.drop(["Unnamed 2", "Unnamed 3", "Unnamed 4"])

tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['message'])
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(df['message'])
padded_sequences = pad_sequences(sequences, maxlen=100, padding='post', truncating='post')

labels = df['label'].apply(lambda x: 1 if x == 'spam' else 0).values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.25)

In [None]:
model = tf.keras.Sequential([
    Embedding(len(word_index) + 1, 128, input_length=100),
    LSTM(64),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)

In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print('Test accuracy:', accuracy)

# Change model type and package it

In [None]:
model.save('spam_ham_model.h5')


In [None]:
loaded_model = tf.keras.models.load_model('spam_ham_model.h5')

converter = tf.lite.TFLiteConverter.from_keras_model(loaded_model)
tflite_model = converter.convert()

with open('model.tflite', 'wb') as f:
    f.write(tflite_model)