#Incorporating LS model to weight adjustment - experiment


In [1]:
from google.colab import drive
drive.mount('/content/drive')   

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


##Imports

In [2]:
import numpy as np
import pandas as pd
import nltk
import os
import time
import random
import copy

In [3]:
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from numpy import save, load
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from random import seed
from random import randint
from sklearn.utils import shuffle
from sklearn.model_selection import KFold

##Data preprocessing

In [51]:
#location to breast-cancer-wisconsin-w.csv dataset
fileLocation = '/content/drive/My Drive/Colab Notebooks/datasets/breast-cancer/breast-cancer-wisconsin-w.csv'

data = pd.read_csv(fileLocation) 
X = data.values[:, 1:10]
y = data.values[:, 10]

In [52]:
#encoding target class
le = LabelEncoder()
y = le.fit_transform(y)

## NN with LS model

In [60]:
class NeuralNetworkLSNN():
    def __init__(self, n_input, n_hidden=30,
                 epochs=100, alpha=0.5,
                 random_state=1, enhancement=0.1, enhancement_type="none"):
        np.random.seed(random_state)

        self.n_input = n_input
        self.n_hidden = n_hidden
        self.n_output = 1 #binary classifier

        self.w_h, self.w_o = self._initialize_weights()

        self.epochs = epochs+1
        self.alpha = alpha

        self.enhancement = enhancement

        self.enhancement_type = enhancement_type

        self.enhancement_functions = {
            "none": self._adjustment_enh_none,
            "ls_value": self._adjustment_ls_value,
            "ls_value_save": self._adjustment_ls_value_save,
            "save_node_difference": self._adjustment_save_node_difference,
            "value_node_difference": self._adjustment_value_node_difference,
            "save_node_unified": self._adjustment_save_node_unified,
            "value_node_unified": self._adjustment_value_node_unified,
            "save_node_unified_flattened": self._adjustment_save_node_unified_flattened,
            "value_node_unified_flattened": self._adjustment_value_node_unified_flattened
        }

    def fit_eval(self, X, y, X_eval, y_eval):
        # repeat
        start_time = time.time()
        
        error_per_epoch = np.zeros((self.epochs))
        accuracy_per_epoch = np.zeros((self.epochs))

        error_per_epoch_train = np.zeros((self.epochs))
        accuracy_per_epoch_train = np.zeros((self.epochs))
        
        for i in range(self.epochs):
            error, accuracy = self._eval(X_eval, y_eval)
            error_per_epoch[i] = error
            accuracy_per_epoch[i] = accuracy
            error_train, accuracy_train = self._eval(X_train, y_train)
            accuracy_per_epoch_train[i] = accuracy_train

            # iterate over training set
            self.w_h_old = self.w_h
            self.w_o_old = self.w_o
            for j in range(X_train.shape[0]):
                # target for actual input
                target = y_train[j]

                # trained input
                X = X_train[j]

                # activations
                a1, a2, a3, z2, z3 = self._feedforward(X)
                
                self.enhancement_functions[self.enhancement_type](a1, a2, a3, z2, z3, target)

            if (i % 50 == 0): #informative output
              print(self.enhancement_type, ': epoch: ', i, ' time: ', (time.time() - start_time), ' error: ', error, ' accuracy: ', accuracy, ' accuracy_train: ', accuracy_train)

        print(time.time() - start_time)
        return error_per_epoch, accuracy_per_epoch, accuracy_per_epoch_train

    def _adjustment_none_unified(self, a1, a2, a3, z2, z3, target, save):
        # backpropagation - adjusting output layer weights
        adj_o = np.zeros((self.n_hidden, self.n_output))

        delta_output = self._delta_output(a3, target)
        ls = self._loosely_symmetric(a2, z3)
        cmp = np.greater_equal(ls, a2)
        diff = np.abs(ls - a2)
        n_a2 = [a2[i] * (1.0 + self.enhancement)  if  cmp[i] else a2[i] * (1.0 - self.enhancement) for i in range (a2.shape[0])]
        if(save):
          a2 = n_a2
        adj_o = - self.alpha * delta_output * n_a2

        adj_h = np.zeros((self.n_hidden, self.n_input))

        #for each hidden node
        for index in range(self.n_hidden):
          delta_output_times_w_h = self.w_o[index] * self._delta_output(
                      a3, target)
          delta_hidden = self._delta_hidden(
                        delta_output_times_w_h, a2[index])
                  
          ls = self._loosely_symmetric(a1, z2[index])
          cmp = np.greater_equal(ls, a1)
          diff = np.abs(ls - a1)
          n_a1 = [a1[i] * (1.0 + self.enhancement) if  cmp[i] else a1[i] * (1.0 - self.enhancement) for i in range (a1.shape[0])]
          # if(save):
          #   a1 = np.asarray(n_a1)
          adj_h[index] = - self.alpha * delta_hidden * n_a1

        self.w_o += adj_o.reshape(self.w_o.shape)
        self.w_h += adj_h.T

    def _adjustment_none_unified_flattened(self, a1, a2, a3, z2, z3, target, save):
        # backpropagation - adjusting output layer weights
        adj_o = np.zeros((self.n_hidden, self.n_output))

        delta_output = self._delta_output(a3, target)
        ls = self._loosely_symmetric(a2, z3)
        cmp = np.greater_equal(ls, a2)
        diff = np.abs(ls - a2)
        n_a2 = [(a2[i] * (1.0 + self.enhancement) if a2[i] * (self.enhancement) < diff[i] else ls[i])  if  cmp[i] else (a2[i] * (1.0 - self.enhancement) if a2[i] * (self.enhancement) < diff[i] else ls[i])  for i in range (a2.shape[0])]
        if(save):
          a2 = n_a2
        adj_o = - self.alpha * delta_output * n_a2

        adj_h = np.zeros((self.n_hidden, self.n_input))

        #for each hidden node
        for index in range(self.n_hidden):
          delta_output_times_w_h = self.w_o[index] * self._delta_output(
                      a3, target)
          delta_hidden = self._delta_hidden(
                        delta_output_times_w_h, a2[index])
                  
          ls = self._loosely_symmetric(a1, z2[index])
          cmp = np.greater_equal(ls, a1)
          diff = np.abs(ls - a1)
          n_a1 = [(a1[i] * (1.0 + self.enhancement) if a1[i] * (self.enhancement) < diff[i] else ls[i])  if  cmp[i] else (a1[i] * (1.0 - self.enhancement) if a1[i] * (self.enhancement) < diff[i] else ls[i])  for i in range (a1.shape[0])]
          if(save):
            a1 = np.asarray(n_a1)
          adj_h[index] = - self.alpha * delta_hidden * n_a1

        self.w_o += adj_o.reshape(self.w_o.shape)
        self.w_h += adj_h.T

    def _adjustment_save_node_unified(self, a1, a2, a3, z2, z3, target):
      self._adjustment_none_unified(a1, a2, a3, z2, z3, target, True)

    def _adjustment_value_node_unified(self, a1, a2, a3, z2, z3, target):
      self._adjustment_none_unified(a1, a2, a3, z2, z3, target, False)

    def _adjustment_save_node_unified_flattened(self, a1, a2, a3, z2, z3, target):
      self._adjustment_none_unified_flattened(a1, a2, a3, z2, z3, target, True)

    def _adjustment_value_node_unified_flattened(self, a1, a2, a3, z2, z3, target):
      self._adjustment_none_unified_flattened(a1, a2, a3, z2, z3, target, False)

    def _adjustment_none_difference(self, a1, a2, a3, z2, z3, target, save):
        # backpropagation - adjusting output layer weights
        adj_o = np.zeros((self.n_hidden, self.n_output))

        delta_output = self._delta_output(a3, target)
        ls = self._loosely_symmetric(a2, z3)
        n_a2 = (ls-a2)*self.enhancement + a2
        if(save):
          a2 = n_a2
        adj_o = - self.alpha * delta_output * n_a2

        adj_h = np.zeros((self.n_hidden, self.n_input))

        #for each hidden node
        for index in range(self.n_hidden):
          delta_output_times_w_h = self.w_o[index] * self._delta_output(
                      a3, target)
          delta_hidden = self._delta_hidden(
                        delta_output_times_w_h, a2[index])
                  
          ls = self._loosely_symmetric(a1, z2[index])
          n_a1 = (ls-a1)*self.enhancement + a1
          if(save):
            a1 = n_a1
          adj_h[index] = - self.alpha * delta_hidden * n_a1

        self.w_o += adj_o.reshape(self.w_o.shape)
        self.w_h += adj_h.T

    def _adjustment_save_node_difference(self, a1, a2, a3, z2, z3, target):
        self._adjustment_none_difference(a1, a2, a3, z2, z3, target, True)

    def _adjustment_value_node_difference(self, a1, a2, a3, z2, z3, target):
        self._adjustment_none_difference(a1, a2, a3, z2, z3, target, False)     

    def _adjustment_ls_value(self, a1, a2, a3, z2, z3, target):
        # backpropagation - adjusting output layer weights
        adj_o = np.zeros((self.n_hidden, self.n_output))

        delta_output = self._delta_output(a3, target)
        ls = self._loosely_symmetric(a2, z3)
        adj_o = - self.alpha * delta_output * ls

        adj_h = np.zeros((self.n_hidden, self.n_input))

        #for each hidden node
        for index in range(self.n_hidden):
          delta_output_times_w_h = self.w_o[index] * self._delta_output(
                      a3, target)
          delta_hidden = self._delta_hidden(
                        delta_output_times_w_h, a2[index])
                  
          ls = self._loosely_symmetric(a1, z2[index])
          adj_h[index] = - self.alpha * delta_hidden * ls

        self.w_o += adj_o.reshape(self.w_o.shape)
        self.w_h += adj_h.T

    def _adjustment_ls_value_save(self, a1, a2, a3, z2, z3, target):
        # backpropagation - adjusting output layer weights
        adj_o = np.zeros((self.n_hidden, self.n_output))

        delta_output = self._delta_output(a3, target)
        a2 = self._loosely_symmetric(a2, z3)
        adj_o = - self.alpha * delta_output * a2

        adj_h = np.zeros((self.n_hidden, self.n_input))

        #for each hidden node
        for index in range(self.n_hidden):
          delta_output_times_w_h = self.w_o[index] * self._delta_output(
                      a3, target)
          delta_hidden = self._delta_hidden(
                        delta_output_times_w_h, a2[index])
                  
          a1 = self._loosely_symmetric(a1, z2[index])

          adj_h[index] = - self.alpha * delta_hidden * a1

        self.w_o += adj_o.reshape(self.w_o.shape)
        self.w_h += adj_h.T

    def _adjustment_enh_none(self, a1, a2, a3, z2, z3, target):
        # backpropagation - adjusting output layer weights
        adj_o = np.zeros((self.n_hidden, self.n_output))

        delta_output = self._delta_output(a3, target)
        adj_o = - self.alpha * delta_output * a2

        adj_h = np.zeros((self.n_hidden, self.n_input))

        #for each hidden node
        for index in range(self.n_hidden):
          delta_output_times_w_h = self.w_o[index] * self._delta_output(
                      a3, target)
          delta_hidden = self._delta_hidden(
                        delta_output_times_w_h, a2[index])
                  
          adj_h[index] = - self.alpha * delta_hidden * a1

        self.w_o += adj_o.reshape(self.w_o.shape)
        self.w_h += adj_h.T

    def _feedforward(self, X):
        # input
        a1 = X.astype(np.float)

        # wieghted sum - hidden layer
        z2 = a1.dot(self.w_h)
        a2 = self._sigmoid(z2.astype(np.float))

        # wieghted sum - output layer
        z3 = a2.dot(self.w_o)
        a3 = self._sigmoid(z3.astype(np.float))
        return a1, a2, a3, z2, z3

    def _delta_output(self, output, target):
        return -(target - output) * self._sigmoid_derivative(output)

    def _delta_hidden(self, delta_output_times_w_h, output):
        return self._sigmoid_derivative(output) * delta_output_times_w_h

    def _loosely_symmetric(self, a, d):
      b =  np.ones((a.shape)) - a
      c = 1 - d

      bd = (b*d) / (b+d)
      ac = (a*c) / (a+c)
      return (a + bd) / (1 + ac + bd)

    def _predict_value(self, X):
        a1, a2, a3, z2, z3 = self._feedforward(X)
        if a3 >= 0.5:
          return 1
        else:
          return 0

    def predict(self, X):
        result = []
        for x in X:
          result.append(self._predict_value(x))
        return result

    def _activation(self, weight, layer):
        weighted_sum = np.dot(weight, layer)
        return self._sigmoid(weighted_sum)

    def _initialize_weights(self):
        w1 = np.random.randn(self.n_input, self.n_hidden) / np.sqrt(self.n_hidden)
        w2 = np.random.randn(self.n_hidden, self.n_output) / np.sqrt(self.n_output)
        return w1, w2

    def _accuracy_metric(self, actual, predicted):
      correct = 0
      for i in range(len(actual)):
        if actual[i] == predicted[i]:
          correct += 1
      return correct / float(len(actual)) * 100.0

    def _error_squared(self, actual, predicted):
      error = 0
      for i in range(len(actual)):
        error += (predicted[i] - actual[i])**2
      return error * 0.5

    def _eval(self, X, y):
        result = []
        for x in X:
          result.append(self._predict_value(x))
        error = self._error_squared(y, result)
        accuracy = self._accuracy_metric(y, result)
        return error, accuracy

    def _sigmoid(self, z):
        return 1.0 / (1.0 + np.exp(-z))

    def _sigmoid_derivative(self, x):
        return x * (1 - x)

##K-fold cross validation 

In [61]:
accuracy_eval = {
            "none": [],
            "ls_value": [],
            "ls_value_save": [],
            "save_node_difference": [],
            "value_node_difference": [],
            "save_node_unified": [],
            "value_node_unified": [],
            "save_node_unified_flattened": [],
            "value_node_unified_flattened": [],
        }
accuracy_train = {
            "none": [],
            "ls_value": [],
            "ls_value_save": [],
            "save_node_difference": [],
            "value_node_difference": [],
            "save_node_unified": [],
            "value_node_unified": [],
            "save_node_unified_flattened": [],
            "value_node_unified_flattened": [],
        }

##Graph

In [62]:
types = ["none", "ls_value", "ls_value_save", "save_node_difference", "value_node_difference", "save_node_unified", "value_node_unified", "save_node_unified_flattened", "value_node_unified_flattened"]

In [63]:
seed(42)

In [64]:
X_new = copy.copy(X)
y_new = copy.copy(y)
X_new, y_new = shuffle(X_new, y_new)

In [65]:
kf = KFold(n_splits=10)

In [66]:
i = 1
for train_index, test_index in kf.split(X_new):
  print(i)
  X_train, X_test = X_new[train_index], X_new[test_index]
  y_train, y_test = y_new[train_index], y_new[test_index]
  for item in types:
    nn = NeuralNetworkLSNN(n_input=X_train.shape[1], n_hidden=30, epochs=100, enhancement=0.1, enhancement_type=item)
    loss, accuracy1, accuracy2 = nn.fit_eval(X_train, y_train, X_test, y_test)
    accuracy_eval[item].append(accuracy1)
    accuracy_train[item].append(accuracy2)
  i+= 1
  break #show only first run

1
none : epoch:  0  time:  0.28562402725219727  error:  27.0  accuracy:  21.73913043478261  accuracy_train:  29.47882736156352
none : epoch:  50  time:  14.084242820739746  error:  2.5  accuracy:  92.7536231884058  accuracy_train:  94.78827361563518
none : epoch:  100  time:  27.65171241760254  error:  4.0  accuracy:  88.40579710144928  accuracy_train:  92.18241042345277
27.65690040588379
ls_value : epoch:  0  time:  0.6153035163879395  error:  27.0  accuracy:  21.73913043478261  accuracy_train:  29.47882736156352
ls_value : epoch:  50  time:  30.550877332687378  error:  10.5  accuracy:  69.56521739130434  accuracy_train:  64.49511400651465
ls_value : epoch:  100  time:  60.345903396606445  error:  10.5  accuracy:  69.56521739130434  accuracy_train:  64.49511400651465
60.348490953445435




ls_value_save : epoch:  0  time:  0.6851861476898193  error:  27.0  accuracy:  21.73913043478261  accuracy_train:  29.47882736156352
ls_value_save : epoch:  50  time:  34.293734073638916  error:  10.5  accuracy:  69.56521739130434  accuracy_train:  64.49511400651465
ls_value_save : epoch:  100  time:  68.16679191589355  error:  10.5  accuracy:  69.56521739130434  accuracy_train:  64.49511400651465
68.16944193840027
save_node_difference : epoch:  0  time:  0.6602678298950195  error:  27.0  accuracy:  21.73913043478261  accuracy_train:  29.47882736156352
save_node_difference : epoch:  50  time:  33.91011834144592  error:  24.0  accuracy:  30.434782608695656  accuracy_train:  35.50488599348534
save_node_difference : epoch:  100  time:  67.36231970787048  error:  24.0  accuracy:  30.434782608695656  accuracy_train:  35.50488599348534
67.36616206169128
value_node_difference : epoch:  0  time:  0.6813650131225586  error:  27.0  accuracy:  21.73913043478261  accuracy_train:  29.47882736156352

In [None]:
def graphIt(train, eval, name, enh, big=True):
  import matplotlib.pyplot as plt
  dir = "/content/drive/My Drive/Colab Notebooks/datasets/SpamAssassin/train - test"
  fig, axes = plt.subplots(1, sharey=True, sharex=True, figsize=(12, 8))
  axes.set_xlabel("Epoch", fontsize=14)
  axes.set_ylabel("Accuracy", fontsize=14) 

  i = 0
  sum = 0
  sum_train = 0
  for x in range (len(train)):
    sum += eval[x]
    sum_train += train[x]
    i+=1

  axes.plot(sum/(i), 'k--', linewidth=1.0, label="avg eval")
  axes.plot(sum_train/(i), 'k', linewidth=1.0, label="avg train")

  if(big==False):
    plt.ylim([80, 100])
  else:
    plt.ylim([0, 100])
  plt.xlim([0, 100])
  plt.xticks(np.arange(0, 100, 5))
  plt.legend()
  plt.plot()

In [None]:
for item in types:
  graphIt(accuracy_train[item], accuracy_eval[item], item, '-0.1', True)
  graphIt(accuracy_train[item], accuracy_eval[item], item, '-0.1', False)