<a href="https://colab.research.google.com/github/divya374r1/Neural-Network-and-Deep-Learning/blob/main/Spam_Detection_with_Dataset_Upload_(CSV).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# -------------------------------
# Spam Detection with CSV Dataset
# -------------------------------

import pandas as pd
import numpy as np
import tensorflow as tf

# -------------------------------
# 1. Load Dataset
# -------------------------------
# Update path if needed
df = pd.read_csv("/content/spam.csv", encoding="latin-1")

# Rename and select required columns
df = df.rename(columns={"v1": "label", "v2": "text"})
df = df[["text", "label"]]

# Convert labels: spam = 1, ham = 0
df["label"] = (df["label"].str.lower() == "spam").astype(int)

texts = df["text"].values
labels = df["label"].values

print("Dataset loaded successfully!")
print(df.head())

# -------------------------------
# 2. Text Vectorization
# -------------------------------
max_words = 5000
max_len = 20

vectorizer = tf.keras.layers.TextVectorization(
    max_tokens=max_words,
    output_mode="int",
    output_sequence_length=max_len
)

vectorizer.adapt(texts)

# -------------------------------
# 3. Build the Model
# -------------------------------
model = tf.keras.Sequential([
    tf.keras.Input(shape=(1,), dtype=tf.string),
    vectorizer,
    tf.keras.layers.Embedding(input_dim=max_words, output_dim=16),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation="relu"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

model.summary()

# -------------------------------
# 4. Train the Model
# -------------------------------
history = model.fit(
    texts,
    labels,
    epochs=10,
    batch_size=32,
    validation_split=0.2
)

# -------------------------------
# 5. Test / Predict
# -------------------------------
test_messages = np.array([
    "Get free money now",
    "Hey, are we meeting tomorrow?"
], dtype=object)

predictions = model.predict(test_messages)

# -------------------------------
# 6. Display Results
# -------------------------------
for msg, prob in zip(test_messages, predictions):
    print(f"Message: {msg}")
    print(f"Output Probability: {prob[0]:.3f}")
    print("Spam" if prob[0] > 0.5 else "Not Spam")
    print("-" * 40)


Dataset loaded successfully!
                                                text  label
0  Go until jurong point, crazy.. Available only ...      0
1                      Ok lar... Joking wif u oni...      0
2  Free entry in 2 a wkly comp to win FA Cup fina...      1
3  U dun say so early hor... U c already then say...      0
4  Nah I don't think he goes to usf, he lives aro...      0


Epoch 1/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8605 - loss: 0.4459 - val_accuracy: 0.9013 - val_loss: 0.2288
Epoch 2/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9089 - loss: 0.2357 - val_accuracy: 0.9587 - val_loss: 0.1750
Epoch 3/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9596 - loss: 0.1666 - val_accuracy: 0.9758 - val_loss: 0.1222
Epoch 4/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9773 - loss: 0.0988 - val_accuracy: 0.9785 - val_loss: 0.0861
Epoch 5/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9845 - loss: 0.0630 - val_accuracy: 0.9794 - val_loss: 0.0715
Epoch 6/10
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9882 - loss: 0.0463 - val_accuracy: 0.9830 - val_loss: 0.0629
Epoch 7/10
[1m140/140[0m 