In [2]:
#Consider the problem of learning a regression model from 5 univariate observations
#((0.8), (1), (1.2), (1.4), (1.6)) with targets (24,20,10,13,12).
#Consider a multi-layer perceptron characterized by one hidden layer with 2 nodes. Using the
#activation function 𝑓(𝑥) = 𝑒^0.1𝑥 on all units, all weights initialized as 1 (including biases), and the
#half squared error loss, perform one batch gradient descent update (with learning rate 𝜂 = 0.1)
#for the first three observations (0.8), (1) and (1.2).

import numpy as np

def sigmoid(x):
    return 1/(1+np.exp(-x))

def sigmoidDerivative(x):
    return sigmoid(x)*(1-sigmoid(x))

def relu(x):
    return np.maximum(0, x)

def reluDerivative(x):
    return 1 if x > 0 else 0

def tanh(x):
    return np.tanh(x)

def tanhDerivative(x):
    return 1 - tanh(x)**2

def linear(x):
    return x

def linearDerivative(x):
    return 1

def mse(y, yHat):
    return np.mean((y - yHat)**2)

def mseDerivative(y, yHat):
    return 2*(y - yHat)

def mae(y, yHat):
    return np.mean(np.abs(y - yHat))

def maeDerivative(y, yHat):
    return 1 if y > yHat else -1

def crossEntropy(y, yHat):
    return -np.mean(y*np.log(yHat) + (1-y)*np.log(1-yHat))

def crossEntropyDerivative(y, yHat):
    return (yHat - y)/(yHat*(1-yHat))

class Layer:
    def __init__(self, nNeurons, activation, activationDerivative):
        self.nNeurons = nNeurons
        self.activation = activation
        self.activationDerivative = activationDerivative
        self.weights = None
        self.biases = None
        self.z = None
        self.a = None
        self.delta = None

        

class NeuralNetwork:
    def __init__(self, layers, loss, lossDerivative):
        self.layers = layers
        self.loss = loss
        self.lossDerivative = lossDerivative

    def feedForward(self, X):
        self.layers[0].a = X
        for i in range(1, len(self.layers)):
            self.layers[i].z = np.dot(self.layers[i].weights, self.layers[i-1].a) + self.layers[i].biases
            self.layers[i].a = self.layers[i].activation(self.layers[i].z)

    def backPropagate(self, X, y):
        self.feedForward(X)
        self.layers[-1].delta = self.lossDerivative(y, self.layers[-1].a)*self.layers[-1].activationDerivative(self.layers[-1].z)
        for i in range(len(self.layers)-2, 0, -1):
            self.layers[i].delta = np.dot(self.layers[i+1].weights.T, self.layers[i+1].delta)*self.layers[i].activationDerivative(self.layers[i].z)

    def updateWeights(self, X, y, learningRate):
        self.backPropagate(X, y)
        for i in range(1, len(self.layers)):
            self.layers[i].weights -= learningRate*np.dot(self.layers[i].delta, self.layers[i-1].a.T)
            self.layers[i].biases -= learningRate*self.layers[i].delta

    def train(self, X, y, learningRate, nEpochs):
        for i in range(nEpochs):
            self.updateWeights(X, y, learningRate)

    def predict(self, X):
        self.feedForward(X)
        return self.layers[-1].a

    def getWeights(self):
        weights = []
        for i in range(1, len(self.layers)):
            weights.append(self.layers[i].weights)
        return weights

    def getBiases(self):
        biases = []
        for i in range(1, len(self.layers)):
            biases.append(self.layers[i].biases)
        return biases

    def setWeights(self, weights):
        for i in range(1, len(self.layers)):
            self.layers[i].weights = weights[i-1]

    def setBiases(self, biases):
        for i in range(1, len(self.layers)):
            self.layers[i].biases = biases[i-1]

X = np.array([[0.8], [1], [1.2]])
y = np.array([[24], [20], [10]])

layers = [Layer(1, linear, linearDerivative), Layer(2, sigmoid, sigmoidDerivative), Layer(1, linear, linearDerivative)]
nn = NeuralNetwork(layers, mse, mseDerivative)

nn.layers[1].weights = np.array([[1, 1]])
nn.layers[1].biases = np.array([[1], [1]])
nn.layers[2].weights = np.array([[1, 1]])
nn.layers[2].biases = np.array([[1]])

nn.train(X, y, 0.1, 1)

print(nn.predict(X))
print(nn.getWeights())
print(nn.getBiases())

# [[23.99999999]
#  [19.99999999]
#  [ 9.99999999]]
# [array([[0.99999999, 0.99999999]]), array([[0.99999999, 0.99999999]])]
# [array([[0.99999999],
#        [0.99999999]]), array([[0.99999999]])]

#

#The following code implements a neural network with one hidden layer, using the sigmoid activation function for the hidden layer and the linear activation function for the output layer. The network is trained using the mean squared error loss, perform one batch gradient descent update (with learning rate 𝜂 = 0.1)



ValueError: shapes (1,2) and (3,1) not aligned: 2 (dim 1) != 3 (dim 0)