In [1]:
import numpy as np
import pandas as pd

SEED = 42
np.random.seed(SEED)

In [2]:
# activation function
def softmax(z):
    exp = np.exp(z)
    return exp / sum(exp)

### Dataset
a tiny dataset with 100 examples of SMSs with label either it is spam or ham. it is preprocessed version so that we can much directed on RNN.
<br>This 100 examples collected from [Kaggle - SMS Spam Collection Dataset](https://www.kaggle.com/datasets/uciml/sms-spam-collection-dataset), full dataset credits goes to them.

In [3]:
data = pd.read_pickle("datasets/preprocessed-spam-sms.pkl")
data

Unnamed: 0,label,text,encoded_text,ham,spam
0,ham,"Go until jurong point, crazy.. Available only ...","[[[1.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...",1,0
1,ham,Ok lar... Joking wif u oni...,"[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...",1,0
2,ham,U dun say so early hor... U c already then say...,"[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...",1,0
3,ham,"Nah I don't think he goes to usf, he lives aro...","[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...",1,0
4,ham,Even my brother is not like to speak with me. ...,"[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...",1,0
...,...,...,...,...,...
95,spam,TheMob> Check out our newest selection of cont...,"[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...",0,1
96,spam,Think ur smart ? Win å£200 this week in our we...,"[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...",0,1
97,spam,December only! Had your mobile 11mths+? You ar...,"[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...",0,1
98,spam,Call Germany for only 1 pence per minute! Call...,"[[[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0...",0,1


In [4]:
# splitting dataset into train and test sets
# train set with 90 samples and test set with 10 samples.
data = data.sample(frac=1).reset_index(drop=True)
train_data = data.head(90).copy().reset_index(drop=True)
test_data = data.tail(10).copy().reset_index(drop=True)

X_train = np.array(train_data["encoded_text"])
y_train = np.array(train_data[["ham","spam"]])

test_text = test_data["text"]
X_test = np.array(test_data["encoded_text"])
y_test = np.array(test_data[["ham","spam"]])

### Model: Recurrent Neural Networks (RNNs)

RNNs are a type of neural network designed for processing sequences of data. They use recurrent connections to maintain a hidden state, allowing them to capture dependencies within sequences. RNNs are used in tasks like natural language processing and time series analysis.

The architecture of a Recurrent Neural Network (RNN) consists of:

1. Input Layer: Receives sequential data.

2. Hidden State: Maintains information from previous time steps, allowing the network to capture temporal dependencies.

3. Weight Sharing: Reuses the same set of weights and biases across time steps.

4. Output Layer: Produces predictions or representations based on the processed sequence.

5. Training: RNNs are trained using backpropagation through time (BPTT), where gradients are computed and weights are updated.

RNNs can be further extended with more advanced variants like LSTM and GRU to address issues like vanishing gradients.


In [5]:
class RNN:
    
    input_size = 750
    output_size = 2
    hidden_size = 90
    lr = 0.001
    cr = 1
    
    def __init__(self):
        
        self.weights = { 'input-to-hidden' :np.random.randn(self.hidden_size, self.input_size), 
                         'hidden-to-hidden':np.random.randn(self.hidden_size, self.hidden_size),
                         'hidden-to-output':np.random.randn(self.output_size, self.hidden_size) }
        self.biases = { 'hidden':np.zeros((self.hidden_size, 1)),
                        'output':np.zeros((self.output_size, 1)) }
    
    def forward(self, inputs):
        
        hidden_activations = np.zeros((self.hidden_size, 1))
        # saving to history
        self.last_inputs = inputs
        self.states = { 0: hidden_activations }
        # passing each word into input and hidden layers
        for i, x in enumerate(inputs):
            input_z = self.weights['input-to-hidden'].dot(x)
            hidden_activations = np.tanh( input_z + self.weights['hidden-to-hidden'].dot(hidden_activations) + self.biases['hidden'])
            self.states[i + 1] = hidden_activations
        # passing through output layer
        outputs = softmax(self.weights['hidden-to-output'].dot(hidden_activations) + self.biases['output'])
        return outputs
    
    def backward(self, error):
        
        # intializing new weights and biases for input layer, hidden layer
        nw_input_to_hidden = np.zeros((self.hidden_size, self.input_size))
        nw_hidden_to_hidden = np.zeros((self.hidden_size, self.hidden_size))
        nb_hidden = np.zeros((self.hidden_size, 1))
        # Calculating outputlayer new weights and biases
        nw_hidden_to_output = error.dot(self.states[len(self.last_inputs)].T)
        nb_output = error
        # calculating hidden layer error with respect to output layer
        hidden_error = self.weights['hidden-to-output'].T.dot(error)
        
        # calculating new weights for hidden layers
        for ti in reversed(range(len(self.last_inputs))):
            
            temp = ((1 - self.states[ti + 1] ** 2) * hidden_error)
            # Calculating new hiddenlayer bias
            nb_hidden += temp
            # Calculating new hidden-to-hidden weights
            nw_hidden_to_hidden += temp.dot(self.states[ti].T)
            # Calculating new input-to-hidden weights
            nw_input_to_hidden += temp.dot(self.last_inputs[ti].T)
            # updating hidden layer error
            hidden_error = self.weights['hidden-to-hidden'].dot(temp)
        
        # Clipping all gradients
        for d in [nw_input_to_hidden, nw_hidden_to_hidden, nw_hidden_to_output, nb_hidden, nb_output]:
            np.clip(d, -self.cr, self.cr, out=d)
        
        # updating weights and biases
        self.weights['hidden-to-output'] -= self.lr * nw_hidden_to_output
        self.weights['hidden-to-hidden'] -= self.lr * nw_hidden_to_hidden
        self.weights['input-to-hidden'] -= self.lr * nw_input_to_hidden
        self.biases['output'] -= self.lr * nb_output
        self.biases['hidden'] -= self.lr * nb_hidden
    
    def fit(self, X, y, epochs=1):
        
        batch_size = X.shape[0]
        for epoch in range(epochs):
            
            batch_loss = []
            for i in range(batch_size):

                # forward
                out = self.forward(X[i])
                # loss
                batch_loss.append(-np.log(out[np.argmax(y[i])])[0])
                # backward
                out[np.argmax(y[i])] -= 1
                self.backward(out)
            
            if epoch%100==0: print('epoch ',(epoch+1),'\t... loss: ',sum(batch_loss)/batch_size)

    def predict(self, inputs):
        
        hidden_activations = np.zeros((self.hidden_size, 1))
        for _ , x in enumerate(inputs):
            hidden_activations = np.tanh( self.weights['input-to-hidden'].dot(x) + self.weights['hidden-to-hidden'].dot(hidden_activations) + self.biases['hidden'])
        
        return softmax(self.weights['hidden-to-output'].dot(hidden_activations) + self.biases['output'])


In [6]:
def random_test_prediction(model):
    label_names = ['ham', 'spam']
    tag = np.random.randint(0,10)
    print('\nTest prediction:',
          '\nSMS: ',test_text[tag] ,
          '\nactual    : ',label_names[np.argmax(y_test[tag])],
          '\nprediction: ',label_names[np.argmax(model.predict(X_test[tag]))])

### Training

In [7]:
rnn = RNN()
rnn.fit(X_train, y_train, epochs=1000)

epoch  1 	... loss:  2.828153920636377
epoch  101 	... loss:  2.0558761333003606
epoch  201 	... loss:  0.9623349563030441
epoch  301 	... loss:  0.6977277797023781
epoch  401 	... loss:  0.6664936161802586
epoch  501 	... loss:  0.6343851476026469
epoch  601 	... loss:  0.5513082546863726
epoch  701 	... loss:  0.5600874677127684
epoch  801 	... loss:  0.5985504138889881
epoch  901 	... loss:  0.5515569019164757


In [8]:
random_test_prediction(rnn)


Test prediction: 
SMS:  Todays Voda numbers ending 7548 are selected to receive a $350 award. If you have a match please call 08712300220 quoting claim code 4041 standard rates app 
actual    :  spam 
prediction:  spam


In [31]:
random_test_prediction(rnn)


Test prediction: 
SMS:  So Ì_ pay first lar... Then when is da stock comin... 
actual    :  ham 
prediction:  ham


#### Looks like model learned somethig!