In [1]:
import numpy as np

First we define our activation function and its derivative

In [2]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

In [3]:
def sigmoid_der(x):
    return x*(1-x)

The training function

In [4]:
def train_NN(x,y):
    weights = np.arange(x.shape[1],dtype=np.float)
    weights.fill(0.1)
    weights = weights.reshape(x.shape[1],1)
    
    for iter in range(10000):
        l1 = sigmoid(np.dot(x,weights))
        err = y - l1
        delta = err * sigmoid_der(l1)
        weights += np.dot(x.T,delta)
        
    return weights


And the predict function

In [5]:
def predict(vals,weights):
    return sigmoid(np.dot(vals,weights))

Simple test

In [6]:
x = np.array([  [0.2, 0.4, 0.9],
                [0.9, 0.2, 0.6],
                [0.3, 0.8, 0.3],
                [0.4, 0.3, 0.1] ])


y = np.array([[0.2],
              [0.4],
              [0.5],
              [0.5]])

w = train_NN(x,y)

In [7]:
w

array([[ 0.54576717],
       [ 0.35881297],
       [-1.71705122]])

Test our weights

In [8]:
for i in range(4):
    print(predict(x[i],w))

[0.21540166]
[0.38526078]
[0.48392184]
[0.53848514]


In [9]:
predict(np.array([0.5,0.7,0.4]),w)

array([0.45939765])

# Or operation

In [11]:
x1 = np.array([ [0, 0],
                [0, 1],
                [1, 0],
                [1, 1] ])

y1 = np.array([[0],
               [1],
               [1],
               [1]])
w = train_NN(x1,y1)

In [12]:
w

array([[4.93068387],
       [4.93068387]])

In [13]:
for i in range(4):
    print(predict(x1[i],w))

[0.5]
[0.99283021]
[0.99283021]
[0.99994785]


We can see that the first observation has a big error (50%)<br>
The problem is that the inputs are 0 so:<br>
W1 * 0 + W2 * 0 = 0 for any Wi<br>
sigmoid(0) = 0.5<br>
We cant train the net for that obs.

# adding bias

In [14]:
x2 = np.array([ [0, 0, 1],
                [0, 1, 1],
                [1, 0, 1],
                [1, 1, 1] ])

y2 = np.array([[0],
               [1],
               [1],
               [1]])

w = train_NN(x2,y2)

In [15]:
for i in range(4):
    print(predict(x2[i],w))

[0.01628075]
[0.9897395]
[0.9897395]
[0.99999822]


# XOR 

In [16]:
x2 = np.array([ [0, 0, 1],
                [0, 1, 1],
                [1, 0, 1],
                [1, 1, 1] ])

y2 = np.array([[0],
               [1],
               [1],
               [0]])

w = train_NN(x2,y2)

In [17]:
w

array([[ 2.22044605e-16],
       [ 2.22044605e-16],
       [-3.05311332e-16]])

In [18]:
for i in range(4):
    print(predict(x2[i],w))

[0.5]
[0.5]
[0.5]
[0.5]


One neuron => Linear function and XOR is not linear so we cant solve it without an hidden layer