# Training a Perceptron to approximate logical disjunction

 Following [Wikipedia's summary of the Perceptron learning algorithm](https://en.m.wikipedia.org/wiki/Perceptron#Learning_algorithm). Using Arrays to represent vectors and Doubles for all numbers, for simplicity.

This network takes features as input vector $\mathbf{x}_j$, a vector of weights (one per feature) as $\mathbf{w}$, and an activation function $\phi$, where $x_{j,i}, w_{i}, \hat{y}_j \in \{0, 1\}$:

$$\hat{y}_j = \phi(\mathbf{w} \cdot \mathbf{x}_j)$$

The dot product of two vectors is defined by $\mathbf{a} \cdot \mathbf{b} = \sum _{i=1}^{n} a_{i} b_{i}$:

In [1]:
func dotProduct(_ a: [Double], _ b: [Double]) -> Double {
    zip(a, b).map(*).reduce(0, +)
}

The activation function is the Heaviside or unit step function, which can be defined by $H(x) = \frac{x + \left|x\right|}{2x}$:

In [2]:
func unitStep(_ x: Double) -> Double {
    (x > 0) ? 1 : 0
}

The predicted output of the Perceptron can be calculated by:

In [3]:
func predictedOutput(_ inputs: [Double], weights: [Double], activation: (Double) -> (Double)) -> Double {
    activation(dotProduct(weights, inputs))
}

Or, given an array of input values, by:

In [4]:
func predictedOutputs(_ inputs: [[Double]], weights: [Double], activation: (Double) -> (Double)) -> [Double] {
    inputs.map { predictedOutput($0, weights: weights, activation: activation) }
}

The Perceptron's error function (or cost function, objective function, loss function) is defined by $E(x_i) = y_i - \hat{y_i}$:

In [5]:
func error(prediction: Double, sample: Double) -> Double {
    sample - prediction
}

The total error is defined by $\sum_{i=1}^{m}  y_{i} - \hat{y_{i}}$:

In [6]:
func summedError(predictions: [Double], samples: [Double]) -> Double {
    zip(samples, predictions).map(-).reduce(0, +)
}

The mean error is given by $\frac{1}{m} \sum_{i=1}^{m} y_{i} - \hat{y_{i}}$, where $m$ is the number of samples:

In [7]:
func meanError(predictions: [Double], samples: [Double]) -> Double {
    (1 / Double(samples.count)) * summedError(predictions: predictions, samples: samples)
}

The accuracy (percentage correct) can be calculated by:

In [8]:
func predictionAccuracy(predictions: [Double], samples: [Double]) -> Double {
    let checkedPredictions = zip(predictions, samples).reduce(into: [Double]()) { checked, outputs in
        let prediction = outputs.0
        let sample = outputs.1
        checked.append(prediction == sample ? 1 : 0)
    }
    let correct = checkedPredictions.reduce(0, +)
    let total = Double(predictions.count)
    return correct / total
}

The precision (proportion of positive identifications that were actually correct) can be calculated by:

In [9]:
func predictionPrecision(predictions: [Double], samples: [Double]) -> Double {
    let checkedPredictions = zip(predictions, samples).reduce(into: [Double]()) { checked, outputs in
        switch outputs {
        case (1, 1):
            checked.append(1)
        default:
            checked.append(0)
        }
    }
    let truePositives = checkedPredictions.reduce(0, +)
    let allPositives = predictions.reduce(0, +)
    return truePositives / allPositives
}

recall, f-score

The training data is the truth table for $A \lor B$, structured as an array of tuples where the first value is a vector containing the input values and the second value is the correct output value:

In [10]:
let trainingSamples: [([Double], Double)] = [
    ([0, 0], 0),
    ([0, 1], 1),
    ([1, 0], 1),
    ([1, 1], 1)
]
let trainingSampleInputs: [[Double]] = trainingSamples.map { $0.0 }
let trainingSampleOutputs: [Double] = trainingSamples.map { $0.1 }

Given an untrained Perceptron with an initial set of weights $\mathbf{w} = [0, 0]$, calculate the predicted outputs for the training inputs:

In [11]:
predictedOutputs(trainingSampleInputs, weights: [0, 0], activation: unitStep)

▿ 4 elements
  - 0 : 0.0
  - 1 : 0.0
  - 2 : 0.0
  - 3 : 0.0


Calculate the mean error for the untrained Perceptron on the training inputs:

In [12]:
meanError(
    predictions: predictedOutputs(trainingSampleInputs, weights: [0, 0], activation: unitStep), 
    samples: trainingSampleOutputs
)

0.75


To update the weights during Perceptron training, modify each weight $w_i$ by adding $r E(x_j) x_{j,i}$ to it, where $r$ is the learning rate:

In [13]:
func updatedWeights(_ oldWeights: [Double], error: Double, inputs: [Double], learningRate: Double) -> [Double] {
    let weightsDelta = learningRate * error
    let newWeights = oldWeights.enumerated().map { $1 + (weightsDelta * inputs[$0]) }
    return newWeights
}

In [14]:
func trainWeights(startingFrom startingWeights: [Double], samples: [([Double], Double)], learningRate: Double, activation: (Double) -> (Double)) -> [Double] {
    let sampledInputs = samples.map { $0.0 }
    let sampledOutputs = samples.map { $0.1 }
    var currentWeights = startingWeights
    var predictions = predictedOutputs(sampledInputs, weights: currentWeights, activation: activation)
    var averageError = meanError(predictions: predictions, samples: sampledOutputs)
    var accuracy = predictionAccuracy(predictions: predictions, samples: sampledOutputs)
    var precision = predictionPrecision(predictions: predictions, samples: sampledOutputs)
    var iterations = 0
    var epochs = 0

    print("Starting weights: ", startingWeights)
    print("Predicted outputs: ", predictions)
    print("Mean error: ", averageError)
    print("Accuracy: ", accuracy)
    print("Precision: ", precision)
    print("\n")

    repeat {
        epochs += 1

        for (currentInputs, currentOutput) in samples.shuffled() {
            iterations += 1
            
            let prediction = predictedOutput(currentInputs, weights: currentWeights, activation: activation)
            let currentError = error(prediction: prediction, sample: currentOutput)
            let predictions = predictedOutputs(sampledInputs, weights: currentWeights, activation: activation)
            let averageError = meanError(predictions: predictions, samples: trainingSampleOutputs)
            let accuracy = predictionAccuracy(predictions: predictions, samples: sampledOutputs)
            currentWeights = updatedWeights(currentWeights, error: currentError, inputs: currentInputs, learningRate: learningRate)

//             print("Current weights: ", currentWeights)
//             print("Current inputs: ", currentInputs)
//             print("Correct output: ", currentOutput)
//             print("Predicted output: ", prediction)
//             print("Current error: ", currentError)
//             print("Predicted outputs: ", predictions)
//             print("Mean error: ", averageError)
//             print("Overall prediction accuracy: ", accuracy)
//             print("Updated weights: ", currentWeights)
//             print("\n")
        }
    } while averageError >= 0.25

    predictions = predictedOutputs(sampledInputs, weights: currentWeights, activation: activation)
    averageError = meanError(predictions: predictions, samples: trainingSampleOutputs)
    accuracy = predictionAccuracy(predictions: predictions, samples: sampledOutputs)
    precision = predictionPrecision(predictions: predictions, samples: sampledOutputs)
    
    print("Epochs: ", epochs)
    print("Iterations :", iterations)
    print("Final weights: ", currentWeights)
    print("Predicted outputs: ", predictions)
    print("Mean error: ", averageError)
    print("Accuracy: ", accuracy)
    print("Precision: ", precision)

    return currentWeights
}

In [None]:
trainWeights(startingFrom: [1, -1], samples: trainingSamples, learningRate: 0.5, activation: unitStep)

Starting weights:  [1.0, -1.0]
Predicted outputs:  [0.0, 0.0, 1.0, 0.0]
Mean error:  0.5
Accuracy:  0.5
Precision:  1.0


