# Experiment 1: Single stage suborbital launcher
This experiment contains a single stage lunar suborbital launcher whose goal is to reach the highest possible altitude. The launcher is vertical w.r.t. to the surface. The only actions the agent can take are to ignite and turnoff the engine.

In [3]:
%load_ext autoreload
%autoreload 2
%matplotlib widget

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
from dataclasses import dataclass
from math import radians
import time

import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import trange
import ipywidgets as widgets

from cw.context import time_it
from cw.simulation import Simulation, StatesBase, AB3Integrator, BatchLogger, Logging
from cw.filters import smooth_signal

from topone.sim_post_processing import sim_post_processing
from topone.dynamics_1 import Dynamics1, Stage
from topone.pid_agent import PIDAgent

from agent import Agent
from linear_softmax_agent import LinearSoftmaxAgent, State
from environment import Environment

## Simulation configuration

In [3]:
@dataclass
class States(StatesBase):
    t: float = 0
    command_engine_on: bool = False
    command_drop_stage: bool = False
    gii: np.ndarray = np.zeros(2)
    xii: np.ndarray = np.zeros(2)
    vii: np.ndarray = np.zeros(2)
    aii: np.ndarray = np.zeros(2)
    tci: np.ndarray = np.eye(2)
    vic: np.ndarray = np.zeros(2)
    fii_thrust: np.ndarray = np.zeros(2)
    theta: float = 0.
    theta_dot: float = 0.
    mass: float = 0.
    mass_dot: float = 0.
    h: float = 0.
    engine_on: bool = False
    stage_state: int = 0
    stage_idx: int = 0
    gamma_i: float = 0.
    gamma_e: float = 0.
    latitude: float = 0.
        
    reward: float = 0.
    score: float = 0.
    done: bool = False
    
    delta_v: float = 0.

    def get_y_dot(self):
        y = np.empty(7)
        y[:2] = self.vii
        y[2:4] = self.aii
        y[4] = self.theta_dot
        y[5] = self.mass_dot
        y[6] = self.reward
        return y

    def get_y(self):
        y = np.empty(7)
        y[:2] = self.xii
        y[2:4] = self.vii
        y[4] = self.theta
        y[5] = self.mass
        y[6] = self.score
        return y

    def set_t_y(self, t, y):
        self.t = t
        self.xii = y[:2]
        self.vii = y[2:4]
        self.theta = y[4]
        self.mass = y[5]
        self.score = y[6]

In [4]:
# agent = Agent(
#     epsilon=0.1,
#     alpha=0.9,
#     gamma=0.7,
#     path="./set_2"
# )

agent = LinearSoftmaxAgent(
    alpha=.01,
    gamma=.99,
    path="./set_ls_4d"
)

In [5]:
simulation = Simulation(
    states_class=States,
    integrator=AB3Integrator(
        h=0.01,
        rk4=False,
        fd_max_order=1),
    modules=[
        Dynamics1(
            surface_diameter=1737.4e3,
            mu=4.9048695e12,
            stages=(
                Stage(
                    dry_mass=1,
                    propellant_mass=0.02,
                    specific_impulse=100,
                    thrust=2*1.7),
            ),
            initial_altitude=1000,
            initial_theta_e=radians(90),
            initial_latitude=radians(90),
        ),
        Environment(),
        agent
    ],
    logging=Logging(),
    initial_state_values=None,
)
batch_logger = BatchLogger()
batch_logger.initialize(simulation)
simulation.stash_states()

## Batch run

In [6]:
def run_batch(n_episodes, backup_period=30, timeout=60):
    batch_logger.reset_batch()

    # Backup original logger and swap with faster logger
    original_logger = simulation.logging
    simulation.logging = batch_logger

    last_backup_time = time.time()

    start_time = time.time()

    out = widgets.Output(layout={})
    display(out)
    
    try:
        for i in trange(n_episodes):
            simulation.restore_states()
            simulation.run(1000)

            if i == 0:
                with out:
                    agent.display_greedy_policy()
                out.clear_output(wait=True)

            if time.time() - last_backup_time >= backup_period:
                agent.save()
                last_backup_time = time.time()

                with out:
                    agent.display_greedy_policy()
                out.clear_output(wait=True)

            if time.time() - start_time >= timeout:
                break

    except KeyboardInterrupt:
        print("Batch cancelled")
    finally:
        agent.save()
        simulation.logging = original_logger
        batch_results = batch_logger.finish_batch()
        out.clear_output()
        with out:
            agent.display_greedy_policy()
        return batch_results

In [7]:
batch_results = run_batch(int(1e7), backup_period=5, timeout=60 * 60 * 14)
display(batch_results)

Output()

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10000000.0), HTML(value='')))


Batch cancelled


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-2266b24cef0b>", line 1, in <module>
    batch_results = run_batch(int(1e7), backup_period=5, timeout=60 * 60 * 14)
  File "<ipython-input-6-f72c609a1a1b>", line 41, in run_batch
    batch_results = batch_logger.finish_batch()
  File "/home/jovyan/topone/external/cw/cw/simulation/logging.py", line 142, in finish_batch
    field_list[1][step_idx] = getattr(step_data, field_name)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2044, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the above exception, another e

TypeError: object of type 'NoneType' has no len()

In [10]:
agent.display_greedy_policy()

UNFIRED: [nan nan]
FIRING: [nan nan]
FIRED: [nan nan]


In [8]:
agent.clean(False)

In [31]:
plt.figure()
smooth_signal(batch_results.score, wn=0.01).plot()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7f80c3e7c550>]

## Single simulation

In [26]:
simulation.restore_states()
with time_it("simulation run"):
    result = simulation.run(5)
result

self.s.stage_state=2 self.s.vic[1]=-0.015743986061696816
simulation run: 0.023163738995208405 [s]


In [27]:
plt.figure()
result.stage_state.plot.line(x="t")
result.fii_thrust.plot.line(x="t")
result.vic.plot.line(x="t")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7fc7f954c400>,
 <matplotlib.lines.Line2D at 0x7fc7f94f7760>]

In [29]:
plt.figure()
result.reward.plot.line(x="t")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7fc7f9402e50>]

In [17]:
agent.action_size = 2
agent.state_size = 3

In [21]:
agent._x(State(2), 1)

array([0., 0., 0., 0., 0., 1.])