In [53]:
import numpy as np

## many to one RNN

class RNN:

  def __init__(self, layerDim, activations, a0):
    self.a0 = a0
    self.acacheNN = []
    self.zcacheNN = []
    self.acacheRNN = []
    self.zcacheRNN = []

    self.layerDim = layerDim
    self.layers = len(layerDim)
    self.activations = activations
    self.t = 0

    self.W = []
    self.b = []
    self.dW = []
    self.db = []

    self.init_weights()

  def init_weights(self):
    self.W = []
    self.b = []
    for i in range(2, self.layers-1):
      # print(i)
      # print(self.layerDim[i])
      # print(self.layerDim[i-1])
      self.W.append(np.random.randn(self.layerDim[i], self.layerDim[i-1]) * np.sqrt(2/self.layerDim[i-1]))
      self.b.append(np.zeros((self.layerDim[i], 1)))

      self.dW.append(np.zeros((self.layerDim[i], self.layerDim[i-1])))
      self.db.append(np.zeros((self.layerDim[i], 1)))

    self.Wx = np.random.randn(self.layerDim[1], self.layerDim[0]) * np.sqrt(2/self.layerDim[0])
    self.Wa = np.random.randn(self.layerDim[1], self.layerDim[-2]) * np.sqrt(2/self.layerDim[1])
    self.ba = np.zeros((self.layerDim[1], 1))

    self.dWx = np.zeros((self.layerDim[1], self.layerDim[0]))
    self.dWa = np.zeros((self.layerDim[1], self.layerDim[-2]))
    self.dba = np.zeros((self.layerDim[1], 1))

    self.Wy = np.random.randn(self.layerDim[-1], self.layerDim[-2]) * np.sqrt(2/self.layerDim[-2])
    self.by = np.zeros((self.layerDim[-1], 1))

    self.dWy = np.zeros((self.layerDim[-1], self.layerDim[-2]))
    self.dby = np.zeros((self.layerDim[-1], 1))


  def sigmoid (self, Z):
    Z = np.clip(Z, -500, 500) 
    return 1/(1+np.exp(-Z))
  
  def relu (self, Z):
    return np.maximum(0, Z)
  
  def tanh (self, Z):
    return np.tanh(Z)
  
  def softmax (self, Z):
    expZ = np.exp(Z - np.max(Z))
    return expZ / expZ.sum(axis=0, keepdims=True)
  
  def activationsfunc (self, Z, activation):
    if activation == 'sigmoid':
      return self.sigmoid(Z)
    elif activation == 'relu':
      return self.relu(Z)
    elif activation == 'tanh':
      return self.tanh(Z)
    else:
      return Z
    
  def activationsDerivative(self, Z, activation):
    if activation == 'sigmoid':
      sig = self.sigmoid(Z)
      return sig * (1 - sig)
    elif activation == 'relu':
      return (Z > 0).astype(Z.dtype)
    elif activation == 'tanh':
      return 1 - np.power(self.tanh(Z), 2)
    else:
      return 1
    
  def singleForwardNN(self, a_prev, w, b, activation):
    z = w @ a_prev + b
    a = self.activationsfunc(z, activation)

    self.acacheNN[self.t].append(a)
    self.zcacheNN[self.t].append(z)

    return a
  
  def forwardNN(self, X):
    self.acacheNN.append([X])
    self.zcacheNN.append([])

    a = X
    for i in range(len(self.W)):
      a = self.singleForwardNN(a, self.W[i], self.b[i], self.activations[i+1])

    return a
  
  def singleForwardRNN(self, a_prev, x, activation):
    # print(x.shape)
    # print(a_prev.shape)
    # print(self.Wx.shape)
    # print(self.Wa.shape)
    # print(self.ba.shape)
    z = self.Wx @ x + self.Wa @ a_prev + self.ba
    a = self.activationsfunc(z, activation)

    self.acacheRNN.append(a)
    self.zcacheRNN.append(z)

    a = self.forwardNN(a)

    return a
  
  def forwardRNN(self, X):
    a = self.a0
    self.acacheRNN = [a]
    self.zcacheRNN = []
    self.acacheNN = []
    self.zcacheNN = []
    self.t = 0

    for i in range(X.shape[1]):
      a = self.singleForwardRNN(a, X[:, i:i+1], self.activations[0])
      self.t += 1
    
    return a
  
  def singleBackwardNN(self, da, w, b, z, a_prev, activation):
    dz = da * self.activationsDerivative(z, activation)
    dw = dz @ a_prev.T
    db = np.sum(dz, axis=1, keepdims=True)
    da_prev = w.T @ dz

    return da_prev, dw, db
  
  def backwardNN(self, da):

    for i in range(len(self.W)-1, -1, -1):
      da, dw, db = self.singleBackwardNN(da, self.W[i], self.b[i], self.zcacheNN[self.t][i], self.acacheNN[self.t][i], self.activations[i+1])
      self.dW[i] += dw
      self.db[i] += db

    return da
  
  def singleBackwardRNN(self, da,z, x, a_prev, activation):
    da = self.backwardNN(da)

    dz = da * self.activationsDerivative(z, activation)
    self.dWx += dz @ x.T
    self.dWa += dz @ a_prev.T
    self.dba += np.sum(dz, axis=1, keepdims=True)

    da_prev = self.Wa.T @ dz

    return da_prev
  
  def backwardRNN(self, lossDerivative, X):
    dz = lossDerivative
    # print(dz.shape)
    # print(self.acacheRNN[-1].shape)
    # print(self.dWy.shape)
    self.dWy += dz @ self.acacheNN[-1][-1].T
    self.dby += np.sum(dz, axis=1, keepdims=True)
    da = self.Wy.T @ dz


    for i in range(X.shape[1]-1, -1, -1):
      self.t -= 1
      da = self.singleBackwardRNN(da, self.zcacheRNN[i], X[:, i:i+1], self.acacheNN[self.t][-1], self.activations[0])

    return da
  
  def updateWeights(self, learning_rate):
    for i in range(len(self.W)):
      self.W[i] -= learning_rate * self.dW[i]
      self.b[i] -= learning_rate * self.db[i]

      self.dW[i] = np.zeros(self.dW[i].shape)
      self.db[i] = np.zeros(self.db[i].shape)
      
    self.Wx -= learning_rate * self.dWx
    self.Wa -= learning_rate * self.dWa
    self.ba -= learning_rate * self.dba

    self.Wy -= learning_rate * self.dWy
    self.by -= learning_rate * self.dby

    self.dWx = np.zeros(self.dWx.shape)
    self.dWa = np.zeros(self.dWa.shape)
    self.dba = np.zeros(self.dba.shape)
    self.dWy = np.zeros(self.dWy.shape)
    self.dby = np.zeros(self.dby.shape)
    self.dWx = np.zeros(self.dWx.shape)

  
  def predict(self, X):
    a = self.forwardRNN(X)
    y = self.Wy @ a + self.by
    y = self.softmax(y)

    return y.reshape(-1, 1)
  
  def loss(self, y, y_hat):
    y = np.clip(y, 1e-10, 1-1e-10)
    y_hat = np.clip(y_hat, 1e-10, 1-1e-10)

    return -np.sum(y * np.log(y_hat))
  
  def lossDerivativeSoftmax(self, y, y_hat):
    return  y_hat - y
  
  def train(self, X, Y, learning_rate, epochs):
    examples = len(X)

    for i in range(epochs):
      loss = 0
      for j in range(examples):
        y_hat = self.predict(X[j])
        loss += self.loss(Y[0,j:j+1], y_hat)
        lossDerivative = self.lossDerivativeSoftmax(Y[0,j:j+1], y_hat)
        self.backwardRNN(lossDerivative, X[j])
      self.updateWeights(learning_rate)
      
      if i % 100 == 0:
        print("Epoch: ", i, " Loss: ", loss)
      
      # print("Epoch: ", i, " Loss: ", loss)

  def accuracy (self, X, Y):
    # A = self.predict(X)
    A = np.zeros(Y.shape)
    for i in range(Y.shape[1]):
      A[:, i:i+1] = self.predict(X[i])
    return np.mean(np.argmax(Y, axis=0) == np.argmax(A, axis=0))

  def precision (self, X, Y):
    A = np.zeros(Y.shape)
    for i in range(Y.shape[1]):
      A[:, i:i+1] = self.predict(X[i])

    A = (A == A.max(axis=0, keepdims=1)).astype(int)
    true_positive = np.sum((Y == 1) & (A == 1))
    predicted_positive = np.sum(A == 1)
    return true_positive / predicted_positive if predicted_positive > 0 else 0
    
  def saveWeights(self, filename):
    np.savez(filename, W=self.W, b=self.b, Wx=self.Wx, Wa=self.Wa, ba=self.ba, Wy=self.Wy, by=self.by)

  def loadWeights(self, filename):
    data = np.load(filename)
    self.W = data['W']
    self.b = data['b']
    self.Wx = data['Wx']
    self.Wa = data['Wa']
    self.ba = data['ba']
    self.Wy = data['Wy']
    self.by = data['by']

  




In [54]:
import numpy as np

# Define a function to load GloVe vectors
def load_glove_vectors(filepath):
    word_to_vec = {}
    with open(filepath, 'r', encoding='utf-8') as file:
        for line in file:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype='float32')
            word_to_vec[word] = vector

    word_to_vec['<eol>'] = np.zeros((100,))
    word_to_vec['<unk>'] = np.zeros((100,))
    return word_to_vec

# Load the vectors
glove_vectors = load_glove_vectors('Data/glove.6B/glove.6B.100d.txt')

# glove_vectors["don't"]


In [55]:
import pandas as pd
import re

def remove_punctuation(text):
    return re.sub(r'[^\w\s]', '', str(text).strip())

def remove_extra_spaces(text):
    return re.sub(' +', ' ', text)

df = pd.read_csv('Data/spam_or_not_spam/spam_or_not_spam.csv')
# df['Text'] = df['Message'] if not NaN else df['Subject']

# df['Text'] = df['Message'].fillna(df['Subject'])

# df = df.drop(columns=['Message', 'Subject', 'Message ID', 'Date'], axis=1)

df["Text"] = df["email"].apply(remove_punctuation)
df["Text"] = df["Text"].str.replace('\n', ' ')
df["Text"] = df["Text"].apply(remove_extra_spaces)
df["Text"] = df["Text"].str.strip()
df["Text"] = df["Text"].str.lower()
df["Text"] = df["Text"].str.split(' ')

df = df.drop(columns=['email'], axis=1)

df['Spam'] = df['label']
# df["Spam/Ham"] = df["Spam/Ham"].map({'spam': 1, 'ham': 0})

df

Unnamed: 0,label,Text,Spam
0,0,"[date, wed, number, aug, number, number, numbe...",0
1,0,"[martin, a, posted, tassos, papadopoulos, the,...",0
2,0,"[man, threatens, explosion, in, moscow, thursd...",0
3,0,"[klez, the, virus, that, won, t, die, already,...",0
4,0,"[in, adding, cream, to, spaghetti, carbonara, ...",0
...,...,...,...
2995,1,"[abc, s, good, morning, america, ranks, it, th...",1
2996,1,"[hyperlink, hyperlink, hyperlink, let, mortgag...",1
2997,1,"[thank, you, for, shopping, with, us, gifts, f...",1
2998,1,"[the, famous, ebay, marketing, e, course, lear...",1


In [56]:
X = df['Text'].values
Y = df['Spam'].values

X_vec = []
Y_vec = np.zeros((2, len(Y)))

for i in range(len(X)):
  vec = np.zeros((100, len(X[i])))
  for j in range(len(X[i])):
    if X[i][j] in glove_vectors:
      vec[:, j] = glove_vectors[X[i][j]]
    else:
      vec[:, j] = glove_vectors['<unk>']

  X_vec.append(vec)
  Y_vec[Y[i] - 1, i] = 1

random = np.random.permutation(len(X_vec))
theshold = int(0.8*len(random))
X_train = [X_vec[i] for i in random[0:theshold]]
Y_train = Y_vec[:, random[0:theshold]]

X_test = [X_vec[i] for i in random[theshold:]]
Y_test = Y_vec[:, random[theshold:]]
# print(X_vec[0])
# print(Y_vec[:, 0:1])
# Y_vec[:, 0:1]

In [57]:
model = RNN([100, 100, 50, 2], ['tanh', 'tanh', 'softmax'], np.zeros((50, 1)))

In [58]:
model.loadWeights('RnnModelTest1.npz')

In [59]:
print("Train accuracy:", model.accuracy(X_train, Y_train))
print("Test accuracy:", model.accuracy(X_test, Y_test))

print("Train precision:", model.precision(X_train, Y_train))
print("Test precision:", model.precision(X_test, Y_test))

Train accuracy: 0.7525
Test accuracy: 0.7516666666666667
Train precision: 0.7525
Test precision: 0.7516666666666667


In [60]:
print(model.predict(X_train[1]))
print(Y_train[:, 0:1])

[[0.49964397]
 [0.50035603]]
[[0.]
 [1.]]
