diff --git a/docs/src/define_controller.md b/docs/src/define_controller.md index f8fc36239..554df8830 100644 --- a/docs/src/define_controller.md +++ b/docs/src/define_controller.md @@ -33,7 +33,7 @@ function controller!(mechanism, t) x = get_minimal_state(mechanism) ## Gains - K = [5.0 0.5] * 0.1 + K = [75.0 0.5] * 0.1 # Control inputs u = -K * (x - x_goal) @@ -50,7 +50,7 @@ initialize!(mechanism, :pendulum, We simulate the system for 2 seconds using the `controller!`. ```julia -storage = simulate!(mechanism, 2.0, controller!, +storage = simulate!(mechanism, 20.0, controller!, record=true, verbose=true); ``` diff --git a/docs/src/define_environment.md b/docs/src/define_environment.md index b7b883ffd..6021b6aa5 100644 --- a/docs/src/define_environment.md +++ b/docs/src/define_environment.md @@ -2,6 +2,26 @@ An [`Environment`](@ref) is a convienient object for applications like reinforcement learning and trajectory optimization. +For this example we need the following setup: +```julia +# ## Setup +using Dojo +using DojoEnvironments +using Random +using LinearAlgebra + +# ## Define struct +struct Ant end +``` + +```julia +# ## Define Variables +representation = :minimal #(:minimal, :maximal) +seed = 0 +timestep = 0.01 +T = 2.0 # Total time +``` + To demonstrate, we create the [`Dojo.Ant`](@ref) environment. First, we load (or [create](define_mechanism.md)) a mechanism: ```julia @@ -41,6 +61,11 @@ Random number: rng = MersenneTwister(seed) ``` +Intialize info: +```julia +info = Dict() +``` + Dynamics data: ```julia # state vector @@ -77,7 +102,7 @@ opts_grad = SolverOptions() Environment: ```julia -TYPES = [Ant, T, typeof(mechanism), typeof(aspace), typeof(ospace), typeof(info)] +TYPES = [Ant, typeof(T), typeof(mechanism), typeof(aspace), typeof(ospace), typeof(info)] env = Environment{TYPES...}( mechanism, representation, @@ -89,7 +114,7 @@ env = Environment{TYPES...}( info, [rng], vis, - opts_sim, opts_grad) + opts_step, opts_grad) ``` With the environment instantiated, we can interact with it by overloading the following methods: @@ -126,7 +151,7 @@ function step(env::Environment{Ant}, x, u; reward = cost(env, z1, u_scaled) # check for done - done = is_done(env, z1, u_scaled) + done = is_done(env, z1) # gradients if gradients @@ -232,7 +257,8 @@ for t = 1:100 step(env, env.state, randn(env.num_inputs)) push!(y, copy(env.state)) end -visualize(env, y) +open(vis) +DojoEnvironments.visualize(env, y) ``` The result should be something like this: diff --git a/examples/trajectory_optimization/quadruped_min.jl b/examples/trajectory_optimization/quadruped_min.jl index bfa8990e2..825437eca 100644 --- a/examples/trajectory_optimization/quadruped_min.jl +++ b/examples/trajectory_optimization/quadruped_min.jl @@ -2,10 +2,6 @@ using Pkg Pkg.activate(joinpath(@__DIR__, "..")) Pkg.instantiate() -# ## visualizer -vis = Visualizer() -open(vis) - # ## setup using Dojo using IterativeLQR @@ -13,6 +9,10 @@ using LinearAlgebra using FiniteDiff using DojoEnvironments +# ## visualizer +vis = Visualizer() +open(vis) + # ## system gravity = -9.81 timestep = 0.05 @@ -85,7 +85,7 @@ model = [dyn for t = 1:T-1] x1 = xref[1] ū = [u_control for t = 1:T-1] x̄ = IterativeLQR.rollout(model, x1, ū) -visualize(env, x̄) +DojoEnvironments.visualize(env, x̄) # ## objective qt = [0.3; 0.05; 0.05; 0.01 * ones(3); 0.01 * ones(3); 0.01 * ones(3); fill([0.2, 0.001], 12)...] @@ -134,7 +134,7 @@ x_sol, u_sol = IterativeLQR.get_trajectory(s) vis= Visualizer() open(env.vis) x_view = [[x_sol[1] for t = 1:15]..., x_sol..., [x_sol[end] for t = 1:15]...] -visualize(env, x_view) +DojoEnvironments.visualize(env, x_view) set_camera!(env.vis, cam_pos=[0.0, -3.0, 2.0],