In [9]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from environment import Environment
from actor_model import Actor
from copy import deepcopy

In [10]:
n_epochs = 100
n_samples = 1
n_locations = 5
max_demand = 10
max_capacity = 50

actor = Actor(n_locations)
optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.1)

env_org = Environment(n_samples, n_locations, max_demand, max_capacity)

In [11]:
grads_lst = []
losses = []
real_acts = []
for epoch in range(n_epochs):
    env = deepcopy(env_org)
    with tf.GradientTape(persistent=True) as tape:
        tape.watch(actor.trainable_variables)
        actions = []
        real_actions = []
        for node in range(2 * n_locations):
            logits = actor(env, training=True) - env.mask * 100000

            logits_max = tf.nn.softmax(logits * 10)

            next_node = tf.reduce_sum(env.locations * tf.tile(tf.expand_dims(logits_max, -1), [1, 1, 2]), axis=1)
            
            real_next_node = env.locations[0, tf.argmax(logits, 1)[0]]
            env.update(tf.argmax(logits, 1)[0])

            actions.append(next_node)  # because of softmax, next_node is not accurate
            real_actions.append(tf.reshape(real_next_node, shape=(1, 2)))

        acts = tf.convert_to_tensor(actions)  # shape [n_steps x n_samples x 2]
        acts_shifted = tf.concat((tf.expand_dims(actions[-1], 0), actions[:-1]), 0)
        real_acts = tf.convert_to_tensor(real_actions)
        distances = tf.math.sqrt(tf.reduce_sum(tf.math.square(acts_shifted - acts), -1) + 1e-12)
        # summed_path = tf.reduce_sum(distances, axis=0)

        # loss = tf.reduce_mean(distances
        # loss = tf.reduce_sum(distances)
        loss = tf.reduce_sum(distances + tf.reduce_sum(env.demands * 1000, axis=1))

    grads = tape.gradient(loss, actor.trainable_variables)
    grads_and_vars = zip(grads, actor.trainable_variables)
    optimizer.apply_gradients(grads_and_vars)
    grads_lst.append(grads)
    losses.append(loss)
    print(loss)

    # plot results
    # plt.figure(figsize=(8, 8))
    # sample_id = 0
    # acts_to_plot = real_acts
    # plt.scatter(env.locations[sample_id, 0, 0], env.locations[sample_id, 0, 1])
    # plt.scatter(env.locations[sample_id, 1:, 0], env.locations[sample_id, 1:, 1])
    # for i in range(len(acts) - 1):
    #     plt.plot(
    #         [acts_to_plot[i, sample_id, 0], acts_to_plot[i + 1, sample_id, 0]],
    #         [acts_to_plot[i, sample_id, 1], acts_to_plot[i + 1, sample_id, 1]],
    #         c=(0.5, 0.5, 0.1)
    #     )
    # plt.title(f'epoch {epoch}, loss {loss}')
    # plt.show()
    # 
    # if len(losses) > 3:
    #     if losses[-1] == losses[-2] and losses[-2] == losses[-3]:
    #         print('Converged')
    #         break

tf.Tensor(12.960556, shape=(), dtype=float32)
tf.Tensor(20.008204, shape=(), dtype=float32)
tf.Tensor(25.227747, shape=(), dtype=float32)
tf.Tensor(26.718882, shape=(), dtype=float32)
tf.Tensor(22.26208, shape=(), dtype=float32)
tf.Tensor(26.699875, shape=(), dtype=float32)
tf.Tensor(27.066778, shape=(), dtype=float32)
tf.Tensor(25.689383, shape=(), dtype=float32)
tf.Tensor(23.42109, shape=(), dtype=float32)
tf.Tensor(23.337936, shape=(), dtype=float32)
tf.Tensor(22.094166, shape=(), dtype=float32)
tf.Tensor(24.043951, shape=(), dtype=float32)
tf.Tensor(23.582558, shape=(), dtype=float32)
tf.Tensor(17.486376, shape=(), dtype=float32)
tf.Tensor(24.352613, shape=(), dtype=float32)
tf.Tensor(26.270056, shape=(), dtype=float32)
tf.Tensor(23.343306, shape=(), dtype=float32)
tf.Tensor(23.855673, shape=(), dtype=float32)
tf.Tensor(22.822279, shape=(), dtype=float32)
tf.Tensor(21.184723, shape=(), dtype=float32)
tf.Tensor(22.141699, shape=(), dtype=float32)
tf.Tensor(21.599043, shape=(), dtype

KeyboardInterrupt: 

In [14]:
env.demands

<tf.Tensor: shape=(1, 5), dtype=float32, numpy=array([[0., 0., 0., 0., 0.]], dtype=float32)>

In [None]:
env.mask

In [None]:
logits2 = actor(env, training=True) - env.mask * 10 ** 8
tf.nn.softmax(logits2 * 1)

In [None]:
logits

In [None]:
logits_max = tf.nn.softmax(logits * 1000)
logits_max

In [None]:
env.mask

In [None]:
env.demands

In [None]:
grads_lst[0]

In [None]:
grads_lst[2]

In [None]:
grads_lst[-1]

In [None]:
logits_max

In [None]:
for loss in losses:
    print(loss)

In [None]:
for grad in grads_lst:
    print("new epoch")
    for g in grad:
        print(tf.reduce_sum(g))

In [None]:
for i in range(len(acts)-1):
    print(acts[i])

In [None]:
plt.figure(figsize=(8, 8))

sample_id = 0

acts_to_plot = real_acts
plt.scatter(env.locations[sample_id, 0, 0], env.locations[sample_id, 0, 1])
plt.scatter(env.locations[sample_id, 1:, 0], env.locations[sample_id, 1:, 1])
for i in range(len(acts) - 1):
    plt.plot(
        [acts[i, sample_id, 0], acts[i + 1, sample_id, 0]],
        [acts[i, sample_id, 1], acts[i + 1, sample_id, 1]],
        c=(0.5, 0.5, 0.04 * i)
    )
plt.show()

In [None]:
env.locations

In [None]:
env.demands