In [1]:
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
import pathlib
import sys
from time import sleep
import torch
sys.path.insert(0, str(pathlib.Path("../..").absolute()))

from environment import TreeEnv
from computation_sim.system import SystemDrawer
from agents.metrics import MovingAverage, MovingTotal
from agents.q_agent import DQNActor

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")


Using device: cuda


In [2]:
dt = 10
params = {
    "environment" : {
        "num_sensors": 5,
        "dt": dt,
        "cost_input": 0.01,
        "cost_message_loss": 1.0,
        "cost_output_time": 0.1,
    },
    "actor" : {
        "learn_period": int(100 / dt), # Update model every 100ms
        "memory_size": 2 * 60 * int(1_000 / dt), # Memory spans a period of 2 minutes,
        "tau" : dt / 2000.0, # Full model update after 2sec
        "batch_size" : 1024, # Number of samples used in one optimization
        "gamma": 0.9, # Hyperparam; not really of used in continuous task
        "epsilon_start": 0.1,
        "epsilon_end": 0.0,
        "epsilon_decay" : 4 * 60 * int(1_000 / dt), # Half-life of epsilon decay: 4 minutes
        "lr" : 1e-3,
    },
    "num_sim_steps": int(3_600_000 / dt) # train for 1h

}

In [5]:
env = TreeEnv(**params["environment"])
drawer = SystemDrawer()
drawer.build(env.system.node_graph)
drawer.fw.update_layout(autosize=True, width=400, height=400)
clear_output(wait=True)
display(drawer.fw)

FigureWidget({
    'data': [{'hoverinfo': 'none',
              'line': {'color': '#888', 'width': 1.0},
              'mode': 'lines',
              'showlegend': False,
              'type': 'scatter',
              'uid': 'f9317f19-b129-4d09-934f-81e045e648ab',
              'x': [0.4642857142857143, 0.9642857142857143, None,
                    0.4642857142857143, 0.9642857142857143, None,
                    -0.5357142857142857, -0.035714285714285726, None,
                    -0.035714285714285726, 0.4642857142857143, None,
                    -0.035714285714285726, 0.4642857142857143, None,
                    -0.5357142857142857, -0.035714285714285726, None,
                    -0.035714285714285726, 0.4642857142857143, None,
                    -0.035714285714285726, 0.4642857142857143, None,
                    -0.5357142857142857, -0.035714285714285726, None,
                    -0.035714285714285726, 0.4642857142857143, None,
                    -0.035714285714285726, 0.464

In [15]:
agent = DQNActor(num_states=env.observation_space.shape[0], num_actions=env.action_space.n, device=DEVICE, **params["actor"])


Initializing zero-element tensors is a no-op

