In [1]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')


Mounted at /content/drive


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense
from keras.utils import to_categorical

# Load the dataset
data = pd.read_csv('./SMS.csv')

# Map labels to numerical values
data['LABEL'] = data['LABEL'].map({'Smishing': 1, 'ham': 0})
data.dropna(subset=['LABEL'], inplace=True)
data.reset_index(drop=True, inplace=True)
# Prepare data for training
X = data['TEXT']
y = data['LABEL']
print(y.unique())

# Tokenize text
max_words = 10000  # Define the maximum number of words to keep
max_length = 200  # Define the sequence length
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X)
X_sequences = tokenizer.texts_to_sequences(X)

# Pad sequences to fixed length
X_padded = pad_sequences(X_sequences, maxlen=max_length)

# Convert labels to categorical
y_categorical = to_categorical(y)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_padded, y_categorical, test_size=0.2, random_state=42)

# Define the CNN model
embedding_dim = 100
filters = 128
kernel_size = 5

model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_length))
model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(10, activation='relu'))
model.add(Dense(2, activation='softmax'))  # Two classes: 'Smishing' and 'ham'

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


# Train the model
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_test, y_test))


[0. 1.]
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x24434231890>

ModuleNotFoundError: No module named 'keras.utils.vis_utils'

In [27]:
import numpy as np
import pickle
#Save the trained model
model.save('./sms_model5.h5')
# Save the model using pickle
with open('sms_model_pickle.pkl', 'wb') as file:
    pickle.dump(model, file)

# Load the saved model
from keras.models import load_model
loaded_model = load_model('./sms_model5.h5')

# Custom message for classification
new_message = "collect your lottery of 2500000 here immediately. Click on this link"

# Tokenize and pad the new message
new_message_sequence = tokenizer.texts_to_sequences([new_message])
new_message_padded = pad_sequences(new_message_sequence, maxlen=max_length)

# Classify the new message
prediction = loaded_model.predict(new_message_padded)
print(prediction)
predicted_label = np.argmax(prediction)
print(predicted_label)
# Decode the predicted label
label_mapping = {0: 'ham', 1: 'Smishing'}
predicted_class = label_mapping[predicted_label]

print(f"The model classifies the message as: '{predicted_class}'")


  saving_api.save_model(


[[0.39506707 0.60493296]]
1
The model classifies the message as: 'Smishing'


# New section