# **Spam Classification Using MLP with Backpropagation**

## Importing libraries

In [13]:
#importing necessary libraries
import numpy as np              #for performing numerical operations
import pandas as pd             #for handling structured data in the form of dataframes
from sklearn.model_selection import train_test_split   #Splits data into training and testing sets
from sklearn.feature_extraction.text import TfidfVectorizer   #converts text data into numerical form using TF-IDF

## Loading dataset

In [14]:
#loading dataset
url="/content/spam.csv"
df=pd.read_csv(url)
print(df.head)

<bound method NDFrame.head of      Category                                            Message
0         ham  Go until jurong point, crazy.. Available only ...
1         ham                      Ok lar... Joking wif u oni...
2        spam  Free entry in 2 a wkly comp to win FA Cup fina...
3         ham  U dun say so early hor... U c already then say...
4         ham  Nah I don't think he goes to usf, he lives aro...
...       ...                                                ...
5567     spam  This is the 2nd time we have tried 2 contact u...
5568      ham               Will ü b going to esplanade fr home?
5569      ham  Pity, * was in mood for that. So...any other s...
5570      ham  The guy did some bitching but I acted like i'd...
5571      ham                         Rofl. Its true to its name

[5572 rows x 2 columns]>


## Data preprocessing

In [15]:
# Convert labels to binary (ham=0, spam=1)
df['Category'] = df['Category'].map({'ham': 0, 'spam': 1})

# Convert text into numerical form using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english', max_features=1000) #stop_words='english': removes common English words like ("the", "is", "a") that do not carry meaningful information
                                                                      #max_features=1000: Limits the vocabulary to the 1000 most important words across all messages

X = vectorizer.fit_transform(df['Message']).toarray()  #converts text messages into a numerical feature matrix, where each row represents a message and each column corresponds to a word’s importance based on TF-IDF scores
y = df['Category'].values.reshape(-1, 1)

## Splitting Dataset

In [16]:
# Split dataset (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## MLP Neural Network

In [17]:
# Define the MLP Neural Network
class MLP:
  #Defines network structure (input, hidden, output layers)
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.learning_rate = learning_rate

        # Initialize weights and biases
        self.W1 = np.random.randn(self.input_size, self.hidden_size)
        self.b1 = np.zeros((1, self.hidden_size))
        self.W2 = np.random.randn(self.hidden_size, self.output_size)
        self.b2 = np.zeros((1, self.output_size))

#Uses sigmoid activation for introducing non-linearity and calculates its derivative for backpropagation
    def sigmoid(self, x):              #Converts input into probabilities (0 to 1)
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):   #Helps adjust weights during backpropagation
        return x * (1 - x)

    def forward(self, X):             #Computes output by applying weights, biases, and activations layer by layer
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)
        return self.a2

    def backward(self, X, y):
        m = y.shape[0]

        # Compute error
        error = self.a2 - y
        d_output = error * self.sigmoid_derivative(self.a2)

        # Compute gradient for hidden layer
        error_hidden = np.dot(d_output, self.W2.T)
        d_hidden = error_hidden * self.sigmoid_derivative(self.a1)

        # Update weights and biases
        self.W2 -= self.learning_rate * np.dot(self.a1.T, d_output) / m
        self.b2 -= self.learning_rate * np.sum(d_output, axis=0, keepdims=True) / m
        self.W1 -= self.learning_rate * np.dot(X.T, d_hidden) / m
        self.b1 -= self.learning_rate * np.sum(d_hidden, axis=0, keepdims=True) / m

#Runs forward and backward propagation for a given number of epochs and prints loss every 100 iterations
    def train(self, X, y, epochs=500):
        for epoch in range(epochs):
            self.forward(X)
            self.backward(X, y)

            if epoch % 100 == 0:
                loss = np.mean((self.a2 - y) ** 2)
                print(f"Epoch {epoch}, Loss: {loss:.6f}")

#Performs forward propagation on new data
    def predict(self, X):
        return (self.forward(X) > 0.5).astype(int)  # Convert probabilities to binary classification(0 or 1)

## Training MLP

In [18]:
# Train the MLP
input_size = X_train.shape[1]  #number of input features from the training data which was transformed using TF-IDF
hidden_size = 6   # Reduced from 10 → 6 neurons
output_size = 1   # Binary classification (spam=1, ham=0)
learning_rate = 0.01   # Controls how fast weights are updated

#Creates an instance of the MLP class with the defined parameters
mlp = MLP(input_size, hidden_size, output_size, learning_rate)
mlp.train(X_train, y_train, epochs=500)   #Runs forward and backward propagation for 500 epochs


Epoch 0, Loss: 0.436880
Epoch 100, Loss: 0.373241
Epoch 200, Loss: 0.315451
Epoch 300, Loss: 0.267669
Epoch 400, Loss: 0.230799


## Predictions & Accuracy

In [19]:
# Predictions
predictions = mlp.predict(X_test)   #Uses the trained model to classify test messages

# Accuracy Calculation
accuracy = np.mean(predictions == y_test) * 100   #Compares predictions with actual labels and calculates percentage accuracy
print(f"Model Accuracy: {accuracy:.2f}%")      #Displays the model’s performance in percentage form


Model Accuracy: 70.58%


## Conclusion

This code uses backpropagation to create a basic Multi-Layer Perceptron (MLP) neural network that can distinguish between spam and ham SMS messages.  It starts by loading and preparing the dataset, then utilizing TF-IDF vectorization to turn text messages into numerical features.  Next, training and testing sets are created from the dataset.  Using forward and backward propagation and gradient descent to update weights, a neural network with one hidden layer (6 neurons) is started and learns to differentiate between spam and ham messages.  Following 500 epochs of training, the model predicts on the test set, and accuracy is used to assess its performance.