In [11]:
# Static network
import dynet as dy

# multilayer perceptron with two hidden layers
# input 2 nodes
# hidden layer 1: 8 nodes, activation: tanh
# hiddne layer 2: 4 nodes, activation: tanh
# output layer: 1 node
# tanh(V(tanh(Wx+b))+c)
# x: 2x1
# W: 8x2
# b: 8 vector
# V: 4x8
# c: 4 
# U: 1x4

# define the parameters
m = dy.ParameterCollection()
pW = m.add_parameters((8,2)) # _dynet.Parameters
pb = m.add_parameters((8))
pV = m.add_parameters((4,8))
pc = m.add_parameters((4))
pU = m.add_parameters((1,4))

In [12]:
dy.renew_cg()

<_dynet.ComputationGraph at 0x1097cbbd0>

In [13]:
# parameter to expression
W = dy.parameter(pW) #W is of type _dynet.Expression
b = dy.parameter(pb)
V = dy.parameter(pV)
c = dy.parameter(pc)
U = dy.parameter(pU)

In [14]:
print('W.dimension: {}'.format(W.dim()))
print('b.dimension: {}'.format(b.dim()))
print('V.dimension: {}'.format(V.dim()))
print('c.dimension: {}'.format(c.dim()))
print('U.dimension: {}'.format(U.dim()))

W.dimension: ((8, 2), 1)
b.dimension: ((8,), 1)
V.dimension: ((4, 8), 1)
c.dimension: ((4,), 1)
U.dimension: ((1, 4), 1)


In [15]:
# create the network
x = dy.vecInput(2) # an input vector of size 2. _dynet._vecInputExpression
output = dy.logistic(U*(dy.tanh(V*(dy.tanh((W*x)+b))+c))) # output is _dynet.Expression

In [16]:
# we can now query our network
x.set([0,0])
output.value()

0.5899962186813354

In [17]:
y = dy.scalarInput(0) # this will hold the correct answer
loss = dy.binary_log_loss(output, y)

In [18]:
loss.dim()

((1,), 1)

In [19]:
trainer = dy.SimpleSGDTrainer(m)

In [20]:
# single step optimization/training

x.set([1,0])
y.set(1)
loss_value = loss.value() # this performs a forward through the network.
print("the loss before step is:",loss_value)

# now do an optimization step
loss.backward()  # compute the gradients
trainer.update()

# see how it affected the loss:
loss_value = loss.value(recalculate=True) # recalculate=True means "don't use precomputed value"
print("the loss after step is:",loss_value)

the loss before step is: 0.5648019313812256
the loss after step is: 0.48412010073661804


In [21]:
# create training data set
def create_xor_instances(num_rounds=2000):
    questions = []
    answers = []
    for round in range(num_rounds):
        for x1 in 0,1:
            for x2 in 0,1:
                answer = 0 if x1==x2 else 1
                questions.append((x1,x2))
                answers.append(answer)
    return questions, answers

questions, answers = create_xor_instances()

In [22]:
# training
total_loss = 0
seen_instances = 0
for question, answer in zip(questions, answers):
    x.set(question)
    y.set(answer)
    seen_instances += 1
    total_loss += loss.value() # forward
    loss.backward()
    trainer.update()
    if (seen_instances > 1 and seen_instances % 100 == 0):
        print("average loss is:",total_loss / seen_instances)

average loss is: 0.7265449064970017
average loss is: 0.6984464120864868
average loss is: 0.6532946837941805
average loss is: 0.5971078788116574
average loss is: 0.5270029918029905
average loss is: 0.4615929839635889
average loss is: 0.4081690049943115
average loss is: 0.36513409022591076
average loss is: 0.33003903024622966
average loss is: 0.30092951175104826
average loss is: 0.2763945693336427
average loss is: 0.25544368294960196
average loss is: 0.23736714454009555
average loss is: 0.2216324415348936
average loss is: 0.20782698009690892
average loss is: 0.19562589112843853
average loss is: 0.18477075937121887
average loss is: 0.17505420053739928
average loss is: 0.16630833996790706
average loss is: 0.158396128852095
average loss is: 0.15120475837594963
average loss is: 0.1446406377083622
average loss is: 0.1386255380129138
average loss is: 0.1330936080806714
average loss is: 0.12798904782007448
average loss is: 0.12326427972432262
average loss is: 0.11887850037077442
average loss is

In [23]:
# inference
x.set([0,1])
print("0,1",output.value())

x.set([1,0])
print("1,0",output.value())

x.set([0,0])
print("0,0",output.value())

x.set([1,1])
print("1,1",output.value())

0,1 0.999601423740387
1,0 0.9984966516494751
0,0 0.0012946868082508445
1,1 0.0015207799151539803


In [24]:
W.value()

array([[ 0.70821732, -0.86799341],
       [ 1.20668006, -0.3391819 ],
       [ 2.16869354, -1.97315001],
       [ 1.31976581,  0.75390619],
       [-0.84980679, -0.44870019],
       [ 1.06544614, -1.41449165],
       [ 0.17298394,  1.48912907],
       [-0.7218827 , -0.54132581]])

In [25]:
V.value()

array([[-0.14111596, -1.06471193,  0.85738766, -1.65977442,  0.81961733,
        -0.63111877, -1.04056764,  0.88709134],
       [-0.99556535, -0.93962252,  0.28507039, -0.34893525,  0.16688518,
        -1.38976574,  0.59902334, -0.32455865],
       [ 0.50615299, -0.2314343 , -1.16259253, -1.12978232, -0.46192959,
         0.87956804, -1.67490542,  0.73403549],
       [-0.42158768,  0.20856887, -2.4908185 ,  0.01037794,  0.39839748,
        -0.83903939,  0.52883697,  0.72344977]])

In [26]:
U.value()

array([[-4.09640837, -2.86995101,  4.00715542,  4.03360462]])