In dynamic approach, a new network will be created for each training example.
It  is very convenient for networks for which the structure is not fixed,such as recurrent or recursive networks.

In [2]:
import dynet as dy

In [3]:
# create training instances, as before
def create_xor_instances(num_rounds=2000):
    questions = []
    answers = []
    for round in range(num_rounds):
        for x1 in 0,1:
            for x2 in 0,1:
                answer = 0 if x1==x2 else 1
                questions.append((x1,x2))
                answers.append(answer)
    return questions, answers

In [4]:
# create a network for the xor problem given input and output
def create_xor_network(pW, pV, pb, inputs, expected_answer):
    dy.renew_cg() # new computation graph
    W = dy.parameter(pW) # add parameters to graph as expressions
    V = dy.parameter(pV)
    b = dy.parameter(pb)
    x = dy.vecInput(len(inputs))
    x.set(inputs)
    y = dy.scalarInput(expected_answer)
    output = dy.logistic(V*(dy.tanh((W*x)+b)))
    loss =  dy.binary_log_loss(output, y)
    return loss

In [5]:
m2 = dy.ParameterCollection()
pW = m2.add_parameters((8,2))
pV = m2.add_parameters((1,8))
pb = m2.add_parameters((8))
trainer = dy.SimpleSGDTrainer(m2)
# generate training data
questions, answers = create_xor_instances()

In [6]:
seen_instances = 0
total_loss = 0
for question, answer in zip(questions, answers):
    loss = create_xor_network(pW, pV, pb, question, answer)
    seen_instances += 1
    total_loss += loss.value()
    loss.backward()
    trainer.update()
    if (seen_instances > 1 and seen_instances % 100 == 0):
        print("average loss is:",total_loss / seen_instances)

average loss is: 0.705782232284546
average loss is: 0.6587769040465354
average loss is: 0.5938927568991978
average loss is: 0.5207816545665264
average loss is: 0.45379536636173723
average loss is: 0.3984465444770952
average loss is: 0.3538688406667539
average loss is: 0.3178111524367705
average loss is: 0.2882649597773949
average loss is: 0.2637010224517435
average loss is: 0.24299547191285953
average loss is: 0.225322909548413
average loss is: 0.2100705643112843
average loss is: 0.19677691215915338
average loss is: 0.18508905303291975
average loss is: 0.17473317301017233
average loss is: 0.1654938919825808
average loss is: 0.15719963926433897
average loss is: 0.14971214777397873
average loss is: 0.142918800272746
average loss is: 0.13672697506490208
average loss is: 0.13105981387219137
average loss is: 0.12585301618354963
average loss is: 0.12105238193587865
average loss is: 0.11661190433688462
average loss is: 0.11249228119250172
average loss is: 0.1086597370098483
average loss is: 0

In [9]:
W = dy.parameter(pW) # add parameters to graph as expressions
V = dy.parameter(pV)
b = dy.parameter(pb)

x = dy.vecInput(2)
output = dy.logistic(V*(dy.tanh((W*x)+b)))

x.set([0,1])
print("0,1",output.value())

x.set([1,0])
print("1,0",output.value())

x.set([0,0])
print("0,0",output.value())

x.set([1,1])
print("1,1",output.value())

0,1 0.9974074959754944
1,0 0.9974489808082581
0,0 0.000981291988864541
1,1 0.0024815259966999292


In [10]:
W.value()

array([[ 2.35455251, -0.31278911],
       [ 0.02165096,  0.20400636],
       [ 0.20177996,  0.25507054],
       [-0.79492658,  1.63114166],
       [ 2.95638204, -2.04186893],
       [-0.91733837, -0.88101178],
       [ 2.75860119, -3.49698901],
       [-3.08252001, -3.11936283]])

In [11]:
V.value()

array([[-2.66608906, -0.36562508, -0.46586332, -1.13950741, -3.59327698,
         1.95217037,  4.9609127 , -5.6073904 ]])

In [12]:
b.value()

[-0.838267982006073,
 -0.33615848422050476,
 -0.4286726713180542,
 0.11290086805820465,
 0.7609386444091797,
 1.5590980052947998,
 -1.18702232837677,
 1.0107641220092773]

In [13]:
m2.save('models/xor_dynamic_model')