# Spam Detection using Tensorflow (v - 1.5.0), pandas (v - 0.20.3 ) & numpy (v - 1.14.5)

# Importing necessary Packages and modules accordingly.

In [23]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import tensorflow as tf
import warnings

# Load the dataset

In [2]:
data = pd.read_csv(r"C:\Users\Agniv\Desktop\Internships\devtern internship\machine learning project\Spam Detection project\dataset\spam_ham_dataset.csv", encoding='latin-1')

# Handling missing values

In [3]:
data.fillna(method='ffill', inplace=True)

# Select columns for analysis

In [4]:
data = data[['label', 'message']]

# Encode labels

In [5]:
label_encoder = LabelEncoder()

In [6]:
data['label'] = label_encoder.fit_transform(data['label'])

# Split the dataset into training and test sets

In [7]:
X_train, X_test, y_train, y_test = train_test_split(data['message'], data['label'], test_size=0.2, random_state=42)

# Convert 'message' column to strings

In [8]:
X_train = X_train.astype(str)

In [9]:
X_test = X_test.astype(str)

# Vectorize the text data

In [10]:
vectorizer = TfidfVectorizer()

In [11]:
X_train = vectorizer.fit_transform(X_train)

In [12]:
X_test = vectorizer.transform(X_test)

# Build the TensorFlow model

In [13]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model

In [14]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Instructions for updating:
keep_dims is deprecated, use keepdims instead


# Train the model

In [15]:
history = model.fit(X_train.toarray(), y_train, epochs=15, batch_size=64, validation_data=(X_test.toarray(), y_test))

Train on 4136 samples, validate on 1035 samples
Epoch 1/15

Epoch 2/15

Epoch 3/15

Epoch 4/15

Epoch 5/15

Epoch 6/15

Epoch 7/15

Epoch 8/15

Epoch 9/15

Epoch 10/15

Epoch 11/15

Epoch 12/15

Epoch 13/15

Epoch 14/15

Epoch 15/15



# Before evaluating the model

In [24]:
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=FutureWarning)

# Evaluate the model

In [25]:
y_pred = model.predict_classes(X_test.toarray())

In [26]:
accuracy = accuracy_score(y_test, y_pred)

In [27]:
conf_matrix = confusion_matrix(y_test, y_pred)

In [30]:
class_report = classification_report(y_test, y_pred, zero_division=1)


In [34]:
print(f'Accuracy: {accuracy*100:.2f}%')

Accuracy: 71.69%


In [32]:
print(f'Confusion Matrix:\n{conf_matrix}')

Confusion Matrix:
[[742   0]
 [293   0]]


In [33]:
print(f'Classification Report:\n{class_report}')

Classification Report:
              precision    recall  f1-score   support

           0       0.72      1.00      0.84       742
           1       1.00      0.00      0.00       293

    accuracy                           0.72      1035
   macro avg       0.86      0.50      0.42      1035
weighted avg       0.80      0.72      0.60      1035

