# Experiment 1: Single stage suborbital launcher
This experiment contains a single stage lunar suborbital launcher whose goal is to reach the highest possible altitude. The launcher is vertical w.r.t. to the surface. The only actions the agent can take are to ignite and turnoff the engine.

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib widget

In [2]:
from dataclasses import dataclass
from math import radians
import time

import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import trange

from cw.context import time_it
from cw.simulation import Simulation, StatesBase, AB3Integrator, BatchLogger, Logging
from cw.filters import smooth_signal

from topone.sim_post_processing import sim_post_processing
from topone.dynamics_1 import Dynamics1, Stage
from topone.pid_agent import PIDAgent

from agent import Agent
from environment import Environment

## Simulation configuration

In [3]:
@dataclass
class States(StatesBase):
    t: float = 0
    command_engine_on: bool = False
    command_drop_stage: bool = False
    gii: np.ndarray = np.zeros(2)
    xii: np.ndarray = np.zeros(2)
    vii: np.ndarray = np.zeros(2)
    aii: np.ndarray = np.zeros(2)
    fii_thrust: np.ndarray = np.zeros(2)
    theta: float = 0.
    theta_dot: float = 0.
    mass: float = 0.
    mass_dot: float = 0.
    h: float = 0.
    engine_on: bool = False
    stage_state: int = 0
    stage_idx: int = 0
    gamma_i: float = 0.
    gamma_e: float = 0.
    latitude: float = 0.
        
    reward: float = 0.
    score: float = 0.
    done: bool = False
    
    delta_v: float = 0.

    def get_y_dot(self):
        y = np.empty(7)
        y[:2] = self.vii
        y[2:4] = self.aii
        y[4] = self.theta_dot
        y[5] = self.mass_dot
        y[6] = self.reward
        return y

    def get_y(self):
        y = np.empty(7)
        y[:2] = self.xii
        y[2:4] = self.vii
        y[4] = self.theta
        y[5] = self.mass
        y[6] = self.score
        return y

    def set_t_y(self, t, y):
        self.t = t
        self.xii = y[:2]
        self.vii = y[2:4]
        self.theta = y[4]
        self.mass = y[5]
        self.score = y[6]

In [4]:
agent = Agent(
    epsilon=0.1,
    alpha=0.9,
    gamma=0.7,
    path="./set_2"
)

simulation = Simulation(
    states_class=States,
    integrator=AB3Integrator(
        h=0.1,
        rk4=False,
        fd_max_order=1),
    modules=[
        Dynamics1(
            surface_diameter=1737.4e3,
            mu=4.9048695e12,
            stages=(
                Stage(
                    dry_mass=1,
                    propellant_mass=0.02,
                    specific_impulse=100,
                    thrust=2*1.7),
            ),
            initial_altitude=1000,
            initial_theta_e=radians(90),
            initial_latitude=radians(90),
        ),
        Environment(),
        agent
    ],
    logging=Logging(),
    initial_state_values=None,
)
batch_logger = BatchLogger()
batch_logger.initialize(simulation)
simulation.stash_states()

## Batch run

In [5]:
def run_batch(n_episodes, backup_period=30):
    batch_logger.reset_batch()
    
    # Backup original logger and swap with faster logger
    original_logger = simulation.logging
    simulation.logging = batch_logger
    
    last_backup_time = time.time()
    
    try:
        for i in trange(n_episodes):
            simulation.restore_states()
            result = simulation.run(100)
            if time.time() - last_backup_time >= backup_period:                
                agent.save()
                last_backup_time = time.time()
    except:
        raise
    finally:
        agent.save()
        simulation.logging = original_logger
        batch_results = batch_logger.finish_batch()        
        return batch_results

In [6]:
batch_results = run_batch(10000)
display(agent.values)
agent.display_greedy_policy()
# display(batch_results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=10000.0), HTML(value='')))




array([[-475125.524887  , -445655.26714625],
       [-903228.25006405, -218715.0820443 ],
       [-600343.72138475, -743417.59828453]])

UNFIRED: 1 
FIRING: 1
FIRED: 0


In [7]:
plt.figure()
smooth_signal(batch_results.score, wn=0.01).plot()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7f54f456d5e0>]

## Single simulation

In [8]:
simulation.restore_states()
with time_it("simulation run"):
    agent.epsilon = 0.0
    result = simulation.run(100)
    agent.epsilon = 0.1
result

simulation run: 0.011209629999939352 [s]


In [9]:
plt.figure()
result.fii_thrust.plot.line(x="t")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7f54f446abe0>,
 <matplotlib.lines.Line2D at 0x7f54f446a970>]

In [68]:
plt.figure()
result.mass.plot.line(x="t")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x7f1f70e9b5e0>]

In [6]:
type(())

tuple