<a href="https://colab.research.google.com/github/hosseinrezaie0/CI---T.A/blob/main/Neural%20Network/Perceptron/CI_NN_Perceptron_Spam_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [2]:
# Load the dataset
data = pd.read_csv('/content/SMSSpamCollection', sep='\t', header=None, names=['label', 'message'])

In [3]:
data.head()

Unnamed: 0,label,message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [4]:
data['label'] = data['label'].map({'ham':0, 'spam':1})

In [5]:
data['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
0,4825
1,747


In [6]:
data.head()

Unnamed: 0,label,message
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."


In [7]:
x_train, x_test, y_train, y_test = train_test_split(data['message'], data['label'], test_size=0.2, random_state=42)

In [8]:
x_train.shape

(4457,)

In [9]:
x_train.head()

Unnamed: 0,message
1978,Reply to win £100 weekly! Where will the 2006 ...
3989,Hello. Sort of out in town already. That . So ...
3935,How come guoyang go n tell her? Then u told her?
4078,Hey sathya till now we dint meet not even a si...
4086,Orange brings you ringtones from all time Char...


In [10]:
y_train[:5]

Unnamed: 0,label
1978,1
3989,0
3935,0
4078,0
4086,1


In [11]:
# Text preprocessing and vectorization
vectorizer = TfidfVectorizer(max_features=5000)
x_train_vec = vectorizer.fit_transform(x_train).toarray()
x_test_vec = vectorizer.fit_transform(x_test).toarray()

In [12]:
x_train_vec

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [13]:
# Build the model
model = Sequential(
    [
        Dense(128, activation="relu", input_shape=(x_train_vec.shape[1],)),
        Dropout(0.2),
        Dense(64, activation="relu"),
        Dropout(0.2),
        Dense(1, activation="sigmoid"),
    ]
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"],
)

In [15]:
history = model.fit(x_train_vec, y_train, epochs=5, batch_size=32, validation_split=0.1)

Epoch 1/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 43ms/step - accuracy: 0.8522 - loss: 0.4583 - val_accuracy: 0.9552 - val_loss: 0.1463
Epoch 2/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9844 - loss: 0.0700 - val_accuracy: 0.9753 - val_loss: 0.0794
Epoch 3/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9966 - loss: 0.0109 - val_accuracy: 0.9753 - val_loss: 0.0769
Epoch 4/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9997 - loss: 0.0043 - val_accuracy: 0.9776 - val_loss: 0.0827
Epoch 5/5
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9999 - loss: 0.0017 - val_accuracy: 0.9753 - val_loss: 0.0918
