In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pickle as pkl

In [None]:
class Layer:
  def __init__(self):
    self.input = None
    self.output = None

  def forward(self, input):
    raise NotImplementedError

  def backward(self, updated_W_Goutput, lr_alpha):
    raise NotImplementedError

class Linear_Layer(Layer):
  def __init__(self, input_dimensions, no_of_nodes):
    super().__init__()
    np.random.seed(1234)
    self.weights = np.random.randn(input_dimensions, no_of_nodes)
    self.bias = np.random.randn(1, no_of_nodes)

  def forward(self, input):
    self.input = input
    self.output = np.dot(self.input, self.weights) + self.bias
    return self.output

  def backward(self, updated_W_Goutput, lr_alpha):
    local_derivatives = np.dot(updated_W_Goutput, self.weights.T)
    weights_derivatives = np.dot(self.input.T, updated_W_Goutput)
    self.weights -= lr_alpha * weights_derivatives
    self.bias -= lr_alpha * updated_W_Goutput
    return local_derivatives

class Sigmoid_Layer(Layer):
  def __init__(self):
    super().__init__()

  def forward(self, input):
    self.input = input
    self.output = 1 / (1 + np.exp(-input))
    return self.output

  def backward(self, updated_W_Goutput, lr_alpha):
    local_derivatives = self.output * (1 - self.output)
    lz_multiplication = updated_W_Goutput * local_derivatives
    return lz_multiplication


class HyperbolicTangent_Layer(Layer):
  def __init__(self):
    super().__init__()
    
  def forward(self, input):
    self.input = input
    self.output = np.tanh(input)
    return self.output
  
  def backward(self, updated_W_Goutput, lr_alpha):
    local_derivatives = 1 - np.power(self.output, 2)
    lz_multiplication = updated_W_Goutput * local_derivatives
    return lz_multiplication

class Softmax_Layer(Layer):
  def __init__(self):
    super().__init__()

  def forward(self, input):
    self.input = input
    tmp = np.exp(input)
    self.output = tmp / np.sum(tmp)
    return self.output

  def backward(self, updated_W_Goutput, lr_alpha):
    return updated_W_Goutput


class CrossEntropy_Layer(Layer):
  def __init__(self):
    super().__init__()

  def cross_entropy(self, pred_y, actual_y):
    loss = -np.sum(actual_y * np.log(pred_y))
    return loss

  def forward(self, input, actual_y):
    self.input = input
    self.actual_y = actual_y
    self.output = self.cross_entropy(input, actual_y)
    return self.output

  def backward(self, updated_W_Goutput, lr_alpha):
    local_derivatives = self.input - self.actual_y
    dl_dy = updated_W_Goutput * local_derivatives
    return dl_dy

In [None]:
def validation_split(X_train,Y_train):
  sample_num = X_train.shape[0]
  random_split = int(0.9 * sample_num)
  x_train = X_train[:random_split]
  x_val = X_train[random_split:]
  y_train = Y_train[:random_split]
  y_val =  Y_train[random_split:]

  return x_train,x_val,y_train,y_val


In [None]:
class Sequential_Layer(Layer):
  def __init__(self):
    self.layers = []
    self.error_list = []
    self.val_error_list = []
  
  def add(self, layer):
    self.layers.append(layer)

  def fit(self, X_train, Y_train, epochs, lr_alpha, patience):
    cr = CrossEntropy_Layer()

    x_train,x_val,y_train,y_val = validation_split(X_train,Y_train)

    # Intilization
    num_improvements = 0
    best_loss = float("inf")
    sample_n = x_train.shape[0]

    for epoch in range(epochs):
      training_error = 0
      val_error = 0
      for i in range(sample_n):
        output = x_train[i]
        # Forward propagation
        for layer in self.layers:
          output = layer.forward(output)
        training_error += cr.forward(output, y_train[i])

        # Backward propagation
        loss_derivative = cr.backward(1, lr_alpha)
        for layer in reversed(self.layers):
          loss_derivative = layer.backward(loss_derivative, lr_alpha)

        # Prediction using validation data
        y_val_predicted = self.predict(x_val)

        # Calculating loss
        if y_train.shape[1]>1:
          val_error += cr.forward(y_val_predicted,np.argmax(y_val, axis = 1))
          val_error = val_error/y_val_predicted.shape[0]
        else :
          val_error += cr.forward(y_val_predicted, y_val)
          val_error = val_error/y_val_predicted.shape[0]

        # Checking for patience
        if val_error <= best_loss:
          best_loss = val_error
          num_improvements = 0
        else:
          num_improvements +=1
        if num_improvements == patience:
          break

      # Average error for all samples per epoch
      training_error /= sample_n
      self.val_error_list.append(best_loss)
      self.error_list.append(training_error)

  def predict(self, input_data, prob=False):
    samples = input_data.shape[0]
    result = []
    y_pred = []
    for i in range(samples):
        output = input_data[i]
        for layer in self.layers:
            output = layer.forward(output)
        result.append(output)
    if prob:
        return result

    if len(result[0][0])==1:
        for i in result:
            if abs(i[0][0]) >= 0.5:
                y_pred.append(1)
            else:
                y_pred.append(0) 
        return np.array(y_pred)
    for i in result:
        y_pred.append(np.argmax(i[0], axis = 0))
    return np.array(y_pred)

  def accuracy(elf,y_pred, y_true):
    correct = 0
    for i in range(len(y_true)):
        if y_true[i] == y_pred[i]:
            correct += 1
    accuracy = correct / float(len(y_true)) * 100.0
    return accuracy

  def plot(self, lr):
    plt.figure(figsize=(8,8))
    plt.plot(self.error_list, 'b')
    plt.title("Loss vs Epochs", fontsize=18)
    plt.xlabel("Epochs",fontsize=18)
    plt.ylabel("loss", fontsize=18)
    plt.show()


In [None]:
# Testing model 1 on XOR
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]], [[1,0]], [[0,0]], [[0,1]], [[1,0]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]], [[1]],[[0]], [[1]], [[1]]])

# Network
net = Sequential_Layer()
net.add(Linear_Layer(2, 5))
net.add(HyperbolicTangent_Layer())
net.add(Linear_Layer(5, 1))
net.add(HyperbolicTangent_Layer())

# Train the network
learning_rate = 0.1
net.fit(x_train, y_train, epochs=10, lr_alpha = learning_rate, patience=5)
y_pred = net.predict(x_train)
print("Prediction: ",y_pred)
print("Accuracy :", net.accuracy(y_pred, y_train),"%")

# Save Weights
np.set_printoptions(suppress=True)


my_file_name = "XOR_solved.w"
with open(my_file_name, 'wb') as myfile:
  pkl.dump(net, myfile)

# Load the model
with open(my_file_name, 'rb') as input_file:
  model1 = pkl.load(input_file)

net.plot(learning_rate)

In [None]:
# Testing model 2 on XOR
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]], [[1,0]], [[0,0]], [[0,1]], [[1,0]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]], [[1]],[[0]], [[1]], [[1]]])

# Network
net = Sequential_Layer()
net.add(Linear_Layer(2, 4))
net.add(HyperbolicTangent_Layer())
net.add(Linear_Layer(4, 1))
net.add(HyperbolicTangent_Layer())

# Train the network
learning_rate = 0.01
net.fit(x_train, y_train, epochs=200, lr_alpha=learning_rate, patience=5)
y_pred = net.predict(x_train)
print("Prediction: ",y_pred)
print("Accuracy :", net.accuracy(y_pred, y_train),"%")

# Save Weights
np.set_printoptions(suppress=True)


my_file_name = "XOR_solved.w"
with open(my_file_name, 'wb') as myfile:
  pkl.dump(net, myfile)


net.plot(learning_rate)

In [None]:
from keras.datasets import mnist

def OneHotEncoder(target, num_classes):
  res = np.eye(num_classes)[np.array(target).reshape(-1)]
  return res.reshape(list(target.shape)+[num_classes])

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape[0], 1, 784)
x_train = x_train.astype("float32")
x_train /= 255
y_train = OneHotEncoder(y_train.astype(int), 10)

x_test = x_test.reshape(x_test.shape[0], 1, 784)
x_test = x_test.astype("float32")
x_test /= 255
y_test = OneHotEncoder(y_test.astype(int), 10)


In [None]:
def testing_images(model, samples=10):
  for test_data, true in zip(x_test[:samples], y_test[:samples]):
    image = np.reshape(test_data, (28, 28))
    plt.imshow(image, cmap='binary')
    prediction = model.predict(test_data)
    index = np.argmax(true)
    plt.title('Prediction: %s, True: %d' % (prediction[0], index))
    plt.show()

In [9]:
# Model 1
net = Sequential_Layer()
net.add(Linear_Layer(784, 10))     
net.add(HyperbolicTangent_Layer()) 
net.add(Softmax_Layer())
lr = 0.001
net.fit(x_train, y_train, epochs=2, lr_alpha=lr, patience =5)
net.plot(learning_rate)

  loss = -np.sum(actual_y * np.log(pred_y))
  loss = -np.sum(actual_y * np.log(pred_y))


KeyboardInterrupt: ignored

In [None]:
# Model 2
net = Sequential_Layer()
net.add(Linear_Layer(784, 10))     
net.add(Sigmoid_Layer())
net.add(Softmax_Layer())
lr = 0.01
net.fit(x_train, y_train, epochs=5, lr_alpha=lr, patience =5)
net.plot(learning_rate)

In [None]:
# Model 3
net = Sequential_Layer()
net.add(Linear_Layer(784, 10))     
net.add(HyperbolicTangent_Layer())
net.add(Softmax_Layer())
lr = 0.1
net.fit(x_train, y_train, epochs=3, lr_alpha=lr, patience =5)
net.plot(learning_rate)