In [138]:
# Static network
import dynet as dy

# multilayer perceptron with a single hidden layer
# input 2 nodes
# hidden layer: 8 nodes, activation: tanh
# output layer: 1 node
# σ(V(tanh(Wx+b)))
# x: 2x1
# W: 8x2
# b: 8 vector
# V: 1x8

# define the parameters
m = dy.ParameterCollection()
pW = m.add_parameters((8,2)) # _dynet.Parameters
pV = m.add_parameters((1,8))
pb = m.add_parameters((8))

In [139]:
dy.renew_cg() # new computation graph

<_dynet.ComputationGraph at 0x10ab6bb40>

In [140]:
# add the parameters to the graph
# create Expression representing the network 
#(the network will include the Expressions 
# for the Parameters defined in the parameter collection)
W = dy.parameter(pW) #W is of type _dynet.Expression
V = dy.parameter(pV)
b = dy.parameter(pb)

In [141]:
print('W.dimension: {}'.format(W.dim()))
print('b.dimension: {}'.format(b.dim()))
print('V.dimension: {}'.format(V.dim()))

W.dimension: ((8, 2), 1)
b.dimension: ((8,), 1)
V.dimension: ((1, 8), 1)


In [142]:
# create the network
x = dy.vecInput(2) # an input vector of size 2. _dynet._vecInputExpression
output = dy.logistic(V*(dy.tanh((W*x)+b))) # output is _dynet.Expression

In [143]:
print(type(x))
print(type(output))

<class '_dynet._vecInputExpression'>
<class '_dynet.Expression'>


In [144]:
print((W*x+b).dim())
print(dy.tanh((W*x+b)).dim())
print((V*dy.tanh((W*x+b))).dim())
print(dy.logistic(V*dy.tanh((W*x+b))).dim())
print(output.dim())

((8,), 1)
((8,), 1)
((1,), 1)
((1,), 1)
((1,), 1)


In [145]:
# we can now query our network
x.set([0,0])
output.value()

0.6489945650100708

In [146]:
# we want to be able to define a loss, so we need an input expression to work against.
y = dy.scalarInput(0) # this will hold the correct answer
loss = dy.binary_log_loss(output, y)

In [147]:
print(type(y))
print(type(loss))

<class '_dynet._inputExpression'>
<class '_dynet.Expression'>


In [148]:
# trainer = dy.SimpleSGDTrainer(m)
# trainer = dy.CyclicalSGDTrainer(m)
# trainer = dy.AdamTrainer(m)
# trainer = dy.AdagradTrainer(m)
trainer = dy.RMSPropTrainer(m)

In [149]:
type(trainer)

_dynet.RMSPropTrainer

In [150]:
trainer.learning_rate

0.0010000000474974513

In [151]:
# single step optimization/training

x.set([1,0])
y.set(1)
loss_value = loss.value() # this performs a forward through the network.
print("the loss before step is:",loss_value)

# now do an optimization step
loss.backward()  # compute the gradients
trainer.update()

# see how it affected the loss:
loss_value = loss.value(recalculate=True) # recalculate=True means "don't use precomputed value"
print("the loss after step is:",loss_value)

the loss before step is: 0.6511291265487671
the loss after step is: 0.6370114088058472


In [152]:
# create training data set
def create_xor_instances(num_rounds=2000):
    questions = []
    answers = []
    for round in range(num_rounds):
        for x1 in 0,1:
            for x2 in 0,1:
                answer = 0 if x1==x2 else 1
                questions.append((x1,x2))
                answers.append(answer)
    return questions, answers

questions, answers = create_xor_instances()

In [153]:
print(len(questions))
print(len(answers))
print(questions[:4])
print(answers[:4])

8000
8000
[(0, 0), (0, 1), (1, 0), (1, 1)]
[0, 1, 1, 0]


In [158]:
# training
total_loss = 0
seen_instances = 0
for question, answer in zip(questions, answers):
    x.set(question)
    y.set(answer)
    seen_instances += 1
    total_loss += loss.value() # forward
    loss.backward()
    trainer.update()
    if (seen_instances > 1 and seen_instances % 100 == 0):
        print("average loss is:",total_loss / seen_instances)

average loss is: 5.314767573509016e-05
average loss is: 5.154752720955002e-05
average loss is: 5.004935352189932e-05
average loss is: 4.8644935327502024e-05
average loss is: 4.732378857806907e-05
average loss is: 4.608022316157682e-05
average loss is: 4.490785400776076e-05
average loss is: 4.3800606749755386e-05
average loss is: 4.2753403744831705e-05
average loss is: 4.176138444745448e-05
average loss is: 4.082021563425025e-05
average loss is: 3.992632509834948e-05
average loss is: 3.9075942568160825e-05
average loss is: 3.826587290859607e-05
average loss is: 3.749331663736181e-05
average loss is: 3.675577444596456e-05
average loss is: 3.605085012114775e-05
average loss is: 3.537632465496346e-05
average loss is: 3.473029973256895e-05
average loss is: 3.411093204658755e-05
average loss is: 3.351655569314885e-05
average loss is: 3.2945518753628924e-05
average loss is: 3.239659095371557e-05
average loss is: 3.186850319404281e-05
average loss is: 3.1359918340240254e-05
average loss is: 3.

In [159]:
x.set([0,1])
print("0,1",output.value())

x.set([1,0])
print("1,0",output.value())

x.set([0,0])
print("0,0",output.value())

x.set([1,1])
print("1,1",output.value())

0,1 0.9999913573265076
1,0 0.9999917149543762
0,0 4.5227275222714525e-06
1,1 7.1626768658461515e-06


In [115]:
W.value()

array([[ 4.07663822, -2.77607584],
       [ 2.17588735, -3.42900205],
       [ 3.63992882,  3.72946382],
       [ 0.06008036, -1.52189255],
       [-3.54680562,  2.17219925],
       [ 1.74713027, -2.989784  ],
       [-0.82024181, -0.7913931 ],
       [ 4.60517788,  4.43824816]])

In [96]:
V.value()

array([[-1.26245904,  0.91464269,  1.09329867,  0.70665795,  1.29633462,
         1.35833979,  0.8188495 ,  1.72384453]])

In [97]:
b.value()

[0.6605279445648193,
 -0.45908284187316895,
 -0.1739916205406189,
 0.2111283540725708,
 -0.35437655448913574,
 -0.33467215299606323,
 0.3537399470806122,
 -0.7486727237701416]

In [None]:
m.save('models/xor_static_model')