In [None]:
from environments.dm_control.utils import episode_data
import numpy as np
from dm_control.rl.control import Environment
from environments.dm_control import moving_coil2D
import matplotlib.pyplot as plt
import tempfile
from IPython.display import Image,display
from acme.wrappers.canonical_spec import CanonicalSpecWrapper
from acme.wrappers.single_precision import SinglePrecisionWrapper
from acme import specs


from absl import app
from absl import flags
from acme import specs
from acme import types
from acme.environment_loop import EnvironmentLoop
from acme.wrappers.single_precision import SinglePrecisionWrapper
from acme.wrappers.canonical_spec import CanonicalSpecWrapper
from acme.agents.tf import dmpo, d4pg, mpo
from acme.tf import networks
from acme.tf import utils as tf2_utils
from acme.utils import paths
import dm_env
import numpy as np
import sonnet as snt
import tensorflow as tf
from dm_control.rl.control import Environment
from environments.dm_control import tank, moving_coil, moving_coil2D
import shutil
import os

%load_ext autoreload

%autoreload 2

\begin{align}
    \vec{s} = [\vec{x}_{p}, \vec{v}_{p}] = [\vec{x}_{p}, \frac{d \vec{x}_{p}}{d t}]
\end{align}

\begin{align}
    \frac{\vec{F}}{l} = \sum_{i=0}^{N} \frac{\mu_0 I_p I_i}{2 \pi} \frac{ \vec{x_p} - \vec{x}_i}{||\vec{x_p} - \vec{x}_i||^2_2} 
\end{align}

\begin{equation}
\frac{d \vec{s}}{d t} = 
\begin{bmatrix}
\vec{v}_p \\
\frac{1}{m} \vec{F} ( \vec{x}_p, I_0, ... , I_N; \vec{x}_{c,0}, ...,  \vec{x}_{c,N})
\end{bmatrix}
\end{equation}

In [None]:
# Free evolution of the system 
# Tests n time steps of environment with 0 action
# Instantiate env
environment = Environment(moving_coil2D.physics.Physics(),moving_coil2D.tasks.Step(debug = True), time_limit=2.)
timestep = environment.reset()

# Define null constant actions
action = np.zeros(environment.action_spec().shape, np.float32)
# Pull the coil 
action[0] = 1

while not timestep.last():
    timestep = environment.step(action)

# Fetch sim data
data = episode_data.pack_datadict(environment.task.datadict)

moving_coil2D.make_movie.display_movie_ipynb(environment)


In [None]:
# The goal of this tutorial just to be able to train the env.
from distutils.log import debug
from gym import spec
from acme.utils.loggers import tf_summary

time_limit = 2.

physics = moving_coil2D.physics.Physics()
task = moving_coil2D.tasks.Step(t_step = 1.)

environment = Environment(physics, task, time_limit=time_limit)
# Clip actions by bounds
environment = CanonicalSpecWrapper(
    environment= environment,
    clip= True,
)
# Wrap to single precision
environment = SinglePrecisionWrapper(environment)
environment_spec = specs.make_environment_spec(environment)
action_spec = environment_spec.actions


policy_layer_sizes= (64,64)
critic_layer_sizes= (64,64)


# Get total number of action dimensions from action spec.
num_dimensions = np.prod(action_spec.shape, dtype=int)

# Create the shared observation network; here simply a state-less operation.
observation_network = tf2_utils.batch_concat


# Create the policy network.
policy_network = snt.Sequential([
    networks.LayerNormMLP(policy_layer_sizes),
    networks.MultivariateNormalDiagHead(num_dimensions),
])


# The multiplexer transforms concatenates the observations/actions.
multiplexer = networks.CriticMultiplexer(
    critic_network=networks.LayerNormMLP(critic_layer_sizes),
    action_network=networks.ClipToSpec(action_spec))


# Hack dimension to conform to mpo implementation
critic_layer_sizes = list(critic_layer_sizes) + [1]
critic_network = networks.CriticMultiplexer(
    critic_network=networks.LayerNormMLP(critic_layer_sizes))


agent = mpo.MPO(
    environment_spec= environment_spec,
    policy_network= policy_network,
    critic_network= critic_network,
    observation_network= observation_network,
    batch_size = 40,
    target_policy_update_period = 20,
    target_critic_update_period = 20,
    min_replay_size = 10,
)


outpath = '/content' # Destination of tensorboard log file
logger = tf_summary.TFSummaryLogger(logdir = outpath)

# Run the environment loop.
loop = EnvironmentLoop(environment, agent, logger = logger)
loop.run(200)



In [None]:
#%load_ext tensorboard
%tensorboard --logdir /content

In [None]:
from acme.agents.tf import actors
# Create the actor.
actor = actors.FeedForwardActor(
    policy_network=policy_network)

timestep = environment.reset()

# Activate storing of data while looping
environment.task.par_dict["debug"] = True

while not timestep.last():
    action = actor.select_action(timestep.observation)
    timestep = environment.step(action)


In [None]:
# Fetch sim data
data = episode_data.pack_datadict(environment.task.datadict)

moving_coil2D.make_movie.display_movie_ipynb(environment)