## RL Mars Lander from Lunar Lander

From Mars Lander - graphics:

In [12]:
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import interactive  # what is ipywidgets - slider
from matplotlib import rcParams 
from numpy.linalg import norm
from numpy.random import randint
from mpl_toolkits import mplot3d #added for 3D plotting
from scipy.optimize import minimize   # for minimising landing speed
from ipywidgets import interactive 
from matplotlib import rcParams  

rcParams['figure.figsize'] = (10, 8)

# add any imports necessary for RL of LL code?

In [13]:
def mars_surface():
    surfaceN = randint(5, 15)
    land = np.zeros((surfaceN, 2), dtype=int)
    
    # Ensure there's a flat landing site at least 1000m long
    landing_site = randint(1, surfaceN-1)
    land[landing_site, 0] = randint(2000, 5000)
    land[landing_site+1, 0] = min(land[landing_site, 0] + randint(1000, 2000), 6999)
    land[landing_site+1, 1] = land[landing_site, 1] = randint(1, 1500)
    
    # Fill in the rest of the terrain
    for i in range(landing_site):
        land[i, 0] = (land[landing_site, 0] / landing_site) * i
        land[i, 1] = randint(0, 1500)
    
    for i in range(landing_site + 2, surfaceN):
        land[i, 0] = (land[landing_site + 1, 0] + 
                      (7000 - land[landing_site + 1, 0]) / len(land[landing_site + 2:]) * 
                      (i - (landing_site + 1)))
        land[i, 1] = randint(0, 1500)
    
    # impose boundary conditions
    land[0, 0] = 0
    land[-1, 0] = 6999

    return land, landing_site

def plot_surface(land, landing_site):
    fig, ax = plt.subplots()
    ax.plot(land[:landing_site+1, 0], land[:landing_site+1, 1], 'k-')
    ax.plot(land[landing_site+1:, 0], land[landing_site+1:, 1], 'k-')
    ax.plot([land[landing_site, 0], land[landing_site+1, 0]], 
             [land[landing_site, 1], land[landing_site+1, 1]], 'k--')
    ax.set_xlim(0, 7000)
    ax.set_ylim(0, 16000) # HERE was 13000
    return ax

def plot_lander(land, landing_site, X, thrust=None, animate=False, step=10):
    if animate:
        def plot_frame(n=len(X)-1):
            ax = plot_surface(land, landing_site)
            ax.plot(X[:n, 0], X[:n, 1], 'b--')      # trajectory of lander
            ax.plot(X[n, 0], X[n, 1], 'k^', ms=20) # lander (color was b) , what is ^?
            if thrust is not None:
                ax.plot([X[n, 0], X[n, 0] - 100*thrust[n, 0]],
                        [X[n, 1] - 100., X[n, 1] - 100. - 100*thrust[n, 1]], 
                       'r-', lw=10)
        return interactive(plot_frame, n=(0, len(X), step)) #slider
    else:
        ax = plot_surface(land, landing_site) 
        ax.plot(X[:, 0], X[:, 1], 'b--')
        ax.plot(X[-1, 0], X[-1, 1], 'b^')
        return ax

def interpolate_surface(land, x):
    i,  = np.argwhere(land[:, 0] < x)[-1] # segment containing x is [i, i+1]
    m = (land[i+1, 1] - land[i, 1])/(land[i+1, 0] - land[i, 0]) # gradient
    x1, y1 = land[i, :] # point on line with eqn. y - y1 = m(x - x1) 
    return m*(x - x1) + y1

#np.random.seed(20) # seed random number generator for reproducible results
land, landing_site = mars_surface()
#plot_surface(land, landing_site);


def height(land, X):
    return X[1] - interpolate_surface(land, X[0]) #1 in X[1] points to the vertical position y of the lander

assert abs(height(land, [1, land[0, 1]])) < 100.0 # height when on surface left edge should be close to zero
assert abs(height(land, [6999, land[-1, 1]])) < 100.0 # height when on surface at right edge should be close to zero

_land, _landing_site = mars_surface()

def _height(_land, X):
    return X[1] - interpolate_surface(_land, X[0])

points = np.zeros((10, 2))
points[:, 0] = randint(0, 7000, size=10)
points[:, 1] = randint(0, 16000, size=10)
for i in range(10):
    assert abs(height(_land, points[i, :]) - _height(_land, points[i, :])) < 1e-6

## Reinforcement Learning code

In [136]:
# ML step adapted: 


def step(i,X,V,power,prev_shaping,total_reward):   # eliminated self - need to pass necessary variables  # elminated action ! not as ML and doesn't involve rewards
    
    # what args need to be given ? X, V
    
        """
    returns:
         s (list): The states
         r: the reward for that step
         done: True or False
    """
        #action = np.clip(action, -1, +1).astype(np.float32) # .clip(a, a_min, a_max) ; Given an interval, values outside the interval are clipped to the interval edges
 
        state = [
            X[0],              # s[0] is the horizontal coordinate
            X[1],              # s[1] is the vertical coordinate
            V[0],              # s[2] is the horizontal speed
            V[1],              # s[3] is the vertical speed
            #lander.angle, # s[4] is the angle              #DON'T HAVE THIS !! rotate is the angle that we SHOULD make
            #20.0 * lander.angularVelocity / FPS, # s[5] is the angular speed   #DON'T HAVE THIS !!
        ]
        assert len(state) == 4 # should be 6 with angle and angular speed
        
        ## add fuel !

        reward = 0
        shaping = (  # for rewards
            -100 * np.sqrt(state[0] * state[0] + state[1] * state[1]) # Magnitude of position vectors
            - 100 * np.sqrt(state[2] * state[2] + state[3] * state[3])  # Magnitude of velocity vectors
            #- 100 * abs(state[4])
        )  
         
        if prev_shaping is not None:# from reset function, need it? put it in demo_heurisic?
            reward = shaping - prev_shaping         
        prev_shaping = shaping

        reward -= (
            power * 0.30   # was m_power / add ML fuel consumption ?          # might need to choose more appropriate number  #power or thrust?
        )  # less fuel spent is better, about -30 for heuristic landing

        
        return np.array(state, dtype=np.float32), reward#, done, {}   
        #eliminated done (in simulate)

In [181]:
# ML heuristic adapted:  # REPLACE BY SIMULATE?


#def heuristic(s):    # REPLACE BY SIMULATE? (name it heuristic autopilot)? # eliminated environment from arg
"""
        env (erased): 
        s (list): The state. Attributes:
                  s[0] is the horizontal coordinate
                  s[1] is the vertical coordinate
                  s[2] is the horizontal speed
                  s[3] is the vertical speed
                  s[4] is the angle
                  s[5] is the angular speed 
                  Could add extra states: fuel remaining
    returns:
         a: The heuristic to be fed into the step function defined above to determine the next step and reward.
"""
    
    #return a


g = 3.711 # m/s^2 , gravity on Mars
TSFC = 0.0003 # kg/(N*s)
# fuel = 400 kg
Dc = 6.3525 # drag force as a function of velocity

def heuristic_simulation(X0, V0, land, landing_site, 
             fuel=400, dt=0.1, Nstep=1000, 
             autopilot=None, print_interval=100, parameters=None, parachute=None,prev_shaping = None,total_reward = 0):
    
    n = len(X0)       # number of degrees of freedom (2 here)
    X = X0.copy()     # current position
    V = np.array(V0).astype(float)  #was X0.copy()     # current velocity
    Xs = np.zeros((Nstep, n)) # position history (trajectory) 
    Vs = np.zeros((Nstep, n)) # velocity history
    thrust = np.zeros((Nstep, n)) # thrust history
    drag = np.zeros((Nstep, n)) # drag history
    
    angle = np.zeros((Nstep, n)) # should be 1 instead of n?
    
    success = False
    fuel_warning_printed = False
    rotate = randint(-90, 90)            # degrees, initial angle random (heading alignment phase)
    power = 0            # m/s^2, initial thrust power  
    
    e_prev = np.zeros(Nstep) # error history   

    for i in range(Nstep):
        Xs[i, :] = X     # Store positions
        Vs[i, :] = V     # Store velocities
        
        if autopilot is not None:
            
            rotate, power, parachute,total_reward = autopilot(i, X, V, fuel, rotate, power, parameters, parachute,dt,e_prev,Nstep,prev_shaping,total_reward)
            assert abs(rotate) <= 90
            assert 0 <= power <= 12000
        
            rotate_rad = rotate * np.pi / 180.0 # degrees to radians
            thrust[i, :] = power * np.array([np.sin(rotate_rad), 
                                             np.cos(rotate_rad)])
            if fuel <= 0: 
                if not fuel_warning_printed:
                    print("Fuel empty! Setting thrust to zero")
                    fuel_warning_printed = True
                thrust[i, :] = 0
            else:
                fuel -= TSFC * power * dt
                
        m = 2600 + fuel  #kg , Mass of Lander + Rover + fuel  # fuel Mass loss
        
        if parachute == 0:
            # no Drag                                               
            drag[i, :] = 0               
        else: # parachute == 1
            # Drag - Parachute deployed  
            drag[i, :] = -Dc*np.linalg.norm(V)*V                             
            # linalg.norm(x) => Matrix or vector norm
        
        A = np.array([0, -g]) + thrust[i, :]/m + drag[i, :]/m
                                   
        V += A * dt                          # update velocities
        X += V * dt                          # update positions
        # calculate angle (not the same as how much we want the lander to rotate)
        #angle = np.rad2deg(np.arctan(np.linalg.norm(V[:,0])/np.linalg.norm(V[:,1]))); # not sure at all
        
        
        
        if i % print_interval == 0: 
            print(f"i={i:03d} X=[{X[0]:8.3f} {X[1]:8.3f}] V=[{V[0]:8.3f} {V[1]:8.3f}]" # angle={angle:8.3f}
                  f" thrust=[{thrust[i, 0]:8.3f} {thrust[i, 1]:8.3f}] fuel={fuel:8.3f} rotate={rotate:8.3f} parachute={parachute:8.3f}") 
        
       
        done = False  #needed?
        # check for safe or crash landing
        
        
        if X[1] < interpolate_surface(land, X[0]):
            if not (land[landing_site, 0] <= X[0] and X[0] <= land[landing_site + 1, 0]):
                print("crash! did not land on flat ground!")
                reward = -100
                done = True
            elif rotate != 0:
                print("crash! did not land in a vertical position (tilt angle = 0 degrees)")
                reward = -100
                done = True
            elif abs(V[1]) >= 20: #was 40
                print("crash! vertical speed must be limited (<20m/s in absolute value), got ", abs(V[1]))
                reward = -100
                done = True
            elif abs(V[0]) >= 10: #was 20
                print("crash! horizontal speed must be limited (<10m/s in absolute value), got ", abs(V[0]))
                reward = -100
                done = True
            else:
                print("safe landing - well done!")
                success = True
                reward = +100
                done = True
            Nstep = i
            break # in demo_heuristic_lander function
                 
    
    return Xs[:Nstep,:], Vs[:Nstep,:], thrust[:Nstep,:], success, fuel, rotate, parachute,done, total_reward 

In [178]:
def proportional_autopilot(i, X, V, fuel, rotate, power,parameters, parachute, dt, e_prev, Nstep,prev_shaping,total_reward):
    K_v,K_p,K_h,K_i,K_d = parameters
    
    
    c_v = 10.0 # target landing speed, m/s #c vertical
    c_h = 0 #c horizontal and vertical #trade-off for rotation to go back to 0
    
    # Height from landing platform - CORRECTION -from landing platform , not irregular mars surface !
    h = X[1]-land[landing_site, 1]; # was: height(land, X)
    
    # Horizontal displacement
    Xtarget = (land[landing_site+1, 0] + land[landing_site, 0]) // 2 
    dist = (Xtarget-X[0])  #X[i,0], pass the history of X (same for V)

    rotate = np.rad2deg(np.arctan2(dist,h-2000))   
    
    # rough method to avoid 'did not land in a vertical position' error:
    if h<2000:
        rotate = 0
      
    # Combine vertical & horizontal errors
    v_target_vert = -(c_v + K_v*(h-2000));
    v_target_horz = abs(c_h+K_h*dist)
    v_err_vert = abs(v_target_vert - V[1])
    v_err_horz = abs(v_target_horz - V[0])
    e =  v_err_vert + v_err_horz;
    
    e_d = 0
    if i>0:
        e_d = K_d*((e - e_prev[i-1])/dt)
        
    e_prev[i] = e     # Store error
    
    Pout = K_p*(e + e_d + K_i*(e_prev.sum()*dt)) 
    
    power = min(max(Pout, 0.0), 12000.0)   # max thrust
    
    if h > 10000:
        parachute = 0 
    else:
        parachute = 1 #open parachute
    
    # call to funtion 2
    s, r = step(i,X,V,power,prev_shaping,total_reward)  #eliminated done, in simulation
    total_reward += r
    
    
    if i % 100 == 0:
        print(f'total_reward={total_reward:8.3f} e={e:8.3f} power={power:8.3f}')     #K_p={K_p:8.3f} K_h={K_h:8.3f} K_v={K_v:8.3f} K_i={K_i:8.3f} K_d={K_d:8.3f}
    return (rotate, power, parachute,total_reward)

In [179]:
# ML demo_heuristic adapted:


def demo_heuristic_lander(render=False): # testing equivalent , replace render
    
    total_reward = 0  #initialisation
    steps = 0
    s = None # ? (need to initialise states? or call step function) # bef: env.reset(seed=seed)
    #prev_shaping = None
    
    
    while True: # from testing: demo_heuristic_lander(LunarLander(), render=True)
    
        # call to funtion 1 - ML testing lines !!
        
        land, landing_site = mars_surface()
        K_p= 50.000 ; K_h= 0.001 ; K_v= 0.005 ; K_i= 0.005 ; K_d= 75.000

        iterations = 1
        for i in list(range(iterations)):
            X0 = [randint(2000, 5000), randint(15000, 16000)] 
            V0 = [randint(-50,50), randint(-500,-300)]
            try:
                Xs, Vs, thrust, success, fuel, rotate, parachute,done,total_reward = heuristic_simulation(X0, V0, land, landing_site, dt=0.1, Nstep=3500, 
                                                    autopilot=proportional_autopilot, fuel=400,parameters=[K_v,K_p,K_h,K_i,K_d],parachute=None,prev_shaping = None,total_reward = 0)
            except IndexError:
                print('Error: Out of bounds')
                continue
 
        # ^^REDUCE CLUSTER OF VARIBALES OUT
        
        
        #if steps % 20 == 0 or done:
            #print("observations:", " ".join([f"{x:+0.2f}" for x in s]))  # NEED TO RETURN S !
            #print(f"step {steps} total_reward {total_reward:+0.2f}")
        #steps += 1
        
        if done:
            print('The total reward is:',total_reward)
            break

    return total_reward



In [180]:
# running code:
#if __name__ == "__main__":
demo_heuristic_lander(render=True)

total_reward=-3600.000 e= 361.236 power=12000.000
Error: Out of bounds


UnboundLocalError: local variable 'done' referenced before assignment

In [None]:
V = 

angle = np.rad2deg(np.arctan(np.linalg.norm(V[:,0])/np.linalg.norm(V[:,1]))); # not sure at all

## Compare demo_heuristic and ML testing
## Keep MINIMUM for quick testing purposes !

#np.random.seed(122) # seed random number generator for reproducible results 

land, landing_site = mars_surface()

#1. K_p= 50.000 ; K_h= 0.010 ; K_v= 0.010 ; K_i= 0.005 ; K_d= 75.001 ;
K_p= 50.000 ; K_h= 0.001 ; K_v= 0.005 ; K_i= 0.005 ; K_d= 75.000

OutofBounds= 0
OutofFuel = 0
trials = 0
count = 0
iterations = 5
land_speed_results = np.zeros((iterations,2))
fuel_results = np.zeros((iterations))

for i in list(range(iterations)):
    X0 = [randint(2000, 5000), randint(15000, 16000)] 
    V0 = [randint(-50,50), randint(-500,-300)]  
    #V0[0] = 0 #when using seed() for better results and easier testing of parameters
    try:
        Xs, Vs, thrust, success,fuel_warning_printed, fuel, rotate, parachute = simulate(X0, V0, land, landing_site, dt=0.1, Nstep=3500, 
                                        autopilot=proportional_autopilot, fuel=400,parameters=[K_v,K_p,K_h,K_i,K_d],parachute=None)
    except IndexError:
        print('Error: Out of bounds')
        OutofBounds += 1
        land_speed_results[i, i] = np.inf
        fuel_results[i, i] = np.inf
        continue
    land_speed_results[i,:] = (i,(Vs[-1,0:1]))  #  Vs[-1,1] is vertical and Vs[-1,0] is horizontal terminal velocity
    fuel_results[i] = fuel
    count += success
    OutofFuel += fuel_warning_printed
    trials += 1

#assert count/trials > 0.95 # require 95% success rate 

print('Number of success (fuel remaining):',count)
print('Number of Out of fuel:', OutofFuel)
print('Number of Out of bound errors:', OutofBounds)
print('Number of trials that ran without error:',trials)  
print('Success percentage:',(count/(trials+OutofBounds))*100) #when not using seed

print(land_speed_results)
print(fuel_results)

plot_lander(land, landing_site, Xs, thrust, animate=True, step=10)
print('success rate:',(count/trials)*100)


## Description

    ML: Mars Lander
    LL: Lunar Lander

 ##### Action Space
    There are four discrete actions available: do nothing, fire left
    orientation engine, fire main engine, fire right orientation engine.rotate
    
    if self.continuous:
      Action is two floats [main engine, left-right engines] (only need 1 float !!)
      Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
     (Don't need this: Left-right:  -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off)
 ##### Observation Space
    There are 8 states: the coordinates of the lander in `x` & `y`, its linear
    velocities in `x` & `y`, its angle, its angular velocity, and two boleans
    showing if each leg is in contact with the ground or not. more? parachute, fuel
 #### Rewards
    Reward for moving from the top of the screen to the landing pad and zero
    speed is about 100..140 points.
    If the lander moves away from the landing (pad) zone it loses reward.
    If the lander crashes, it receives an additional -100 points. If it comes
    to rest, it receives an additional +100 points. 
    Firing the main engine is -0.3 points each frame. Solved is 200 points.
 #### Episode Termination
     ....
 ### Notes
     - Can probably get rid of: step function, self variable and env variable, seed , render
     
     - What is env? env = gym.make("LunarLander-v2", continuous=True); if env.continuous: ; ... => ML always continous
     - Seed? seed: Optional[int] = None, ; demo_heuristic_lander(env, seed=None, render=False):
     - Info? def reset( ... return_info: bool = False, ; if not return_info:
            return self.step(np.array([0, 0]) if self.continuous else 0)[0] (in reset def)  => can get rid of it
     - Render? def render(self, mode="human"): ... pygame.init()..self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H)).. self.clock =    pygame.time.Clock() => so related to the environment, get rid of it
     - Self: no def, I think that self.[...] is a way of passing the variables between defs? + replace by 'lander' ?
     class
     - Action: can prob eliminate it, the action (power/thrust) is decided in the autopilot, and it's not directly related to the reward
     
Note about code adaptation: there is no middle ground; either I take some stuff from LL (rewards and states) and put them in ML, or I kind od adapt the LL with some aspects of the ML (keep LL graphics and env); 
     

## DRAFT of relevant functions:

# running code:
if __name__ == "__main__":
    demo_heuristic_lander(LunarLander(), render=True)

def step(self, action):
        #if self.continuous:
            action = np.clip(action, -1, +1).astype(np.float32)
        #else:
            #assert self.action_space.contains(
                #action
            #), f"{action!r} ({type(action)}) invalid "

        # Engines # don't use side engines! only main engine
        #tip = (math.sin(self.lander.angle), math.cos(self.lander.angle)) # what is tip??
        #side = (-tip[1], tip[0])
        #dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]  #graphics?

        m_power = 0.0       # from MAIN_ENGINE_POWER (= 13.0) at start of code # replace with ML equivalent
        if (self.continuous and action[0] > 0.0) or (
            not self.continuous and action == 2
        ):
            # Main engine
            #if self.continuous:  # assume that always continuous?
                m_power = (np.clip(action[0], 0.0, 1.0) + 1.0) * 0.5  # 0.5..1.0
                assert m_power >= 0.5 and m_power <= 1.0
            #else:
                #m_power = 1.0   
            ox = ( # movement in x direction ?
                tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1]
            )  # 4 is move a bit downwards, +-2 for randomness
            oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
            
            impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy) # thrust equivalent??

            self.lander.ApplyLinearImpulse(   #ApplyLinearImpulse from Box2D ; keep? only for graphics?
                (-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
                impulse_pos,
                True,
            )

        #s_power = 0.0   #from SIDE_ENGINE_POWER (= 0.6) so deleted everthing that was below this in LL code

        self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) # ?? graphics??
        
        # FPS: frames per second?
 
        pos = X # from ML simulate  #LL: self.lander.position
        vel = V # from ML simulate  #LL: self.lander.linearVelocity
        state = [
            (pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2),
            (pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2),
            vel.x * (VIEWPORT_W / SCALE / 2) / FPS,  # velocity from distance / time calculation ?!
            vel.y * (VIEWPORT_H / SCALE / 2) / FPS,
            self.lander.angle,
            20.0 * self.lander.angularVelocity / FPS,
            #1.0 if self.legs[0].ground_contact else 0.0,  #don't want legs state 6 & 7
            #1.0 if self.legs[1].ground_contact else 0.0,
        ]
        assert len(state) == 6  # was 8 in LL (but state 6 & 7 were ten points for legs contact)

        reward = 0
        shaping = (
            -100 * np.sqrt(state[0] * state[0] + state[1] * state[1]) # Magnitude of position vectors
            - 100 * np.sqrt(state[2] * state[2] + state[3] * state[3])  # Magnitude of velocity vectors
            - 100 * abs(state[4])
            #+ 10 * state[6]  # And ten points for legs contact
            #+ 10 * state[7]
        )  
     
        if self.prev_shaping is not None:
            reward = shaping - self.prev_shaping
        self.prev_shaping = shaping # ??

        reward -= (
            m_power * 0.30   # add ML fuel consumption               # replace from simulate
        )  # less fuel spent is better, about -30 for heuristic landing

        
        done = False  #needed?
        #if self.game_over or abs(state[0]) >= 1.0:
            #done = True
            reward = -100
        #if not self.lander.awake:   # check this means succesful landing ?
            #done = True
            reward = +100
        # ^^replace by simluation checks for crash or landind + attribute rewards from ML simulation:
        # check for safe or crash landing
        if X[1] < interpolate_surface(land, X[0]):
            if not (land[landing_site, 0] <= X[0] and X[0] <= land[landing_site + 1, 0]):
                print("crash! did not land on flat ground!")
                reward = -100
            elif rotate != 0:
                print("crash! did not land in a vertical position (tilt angle = 0 degrees)")
                reward = -100
            elif abs(V[1]) >= 20: #was 40
                print("crash! vertical speed must be limited (<20m/s in absolute value), got ", abs(V[1]))
                reward = -100
            elif abs(V[0]) >= 10: #was 20
                print("crash! horizontal speed must be limited (<10m/s in absolute value), got ", abs(V[0]))
                reward = -100
            else:
                print("safe landing - well done!")
                success = True
                reward = +100
            Nstep = i
            break # keep or replace by 'done' from LL?
        
        # LL return, adapt to ML:
        return np.array(state, dtype=np.float32), reward, done, {}
    
       

## LL: 

def heuristic(env, s):    # equivalent to autopilot? eliminate environment?
    """
    Args:
        env: 
        s (list): The state. Attributes:
                  s[0] is the horizontal coordinate
                  s[1] is the vertical coordinate
                  s[2] is the horizontal speed
                  s[3] is the vertical speed
                  s[4] is the angle
                  s[5] is the angular speed 
                  (s[6] 1 if first leg has contact, else 0   => get rid of & s[7] 1 if second leg has contact, else 0  => get rid of)
                  Could add extra states: fuel remaining
    returns:
         a: The heuristic to be fed into the step function defined above to determine the next step and reward.
    """

    #angle_targ = s[0] * 0.5 + s[2] * 1.0  # angle should point towards center
    #ML code:
    h = height(land, X)
    target = (land[landing_site+1, 0] + land[landing_site, 0]) // 2
    dist = Xtarget-X[0]  #X[i,0], pass the histry of X (same for V)
    rotate = np.rad2deg(np.arctan2(dist,h)) # change name to angle_targ ?

    
    if angle_targ > 0.4:
        angle_targ = 0.4  # more than 0.4 radians (22 degrees) is bad
    if angle_targ < -0.4:
        angle_targ = -0.4
    hover_targ = 0.55 * np.abs(     #related to vertical error??
        s[0]
    )  # target y should be proportional to horizontal offset

    angle_todo = (angle_targ - s[4]) * 0.5 - (s[5]) * 1.0
    hover_todo = (hover_targ - s[1]) * 0.5 - (s[3]) * 0.5

    # assume that only continous possibility?
    #if env.continuous:
        a = np.array([hover_todo * 20 - 1, -angle_todo * 20])
        a = np.clip(a, -1, +1) 
        #Action is two floats [main engine, left-right engines] (only need 1 float !!)
        # side engines in charge of rotating! change by rotate?
    
    #else: # discrete
        #a = 0      # action 1: do nothing
        #if hover_todo > np.abs(angle_todo) and hover_todo > 0.05:
            #a = 2  # action 3: fire main engine
        #elif angle_todo < -0.05:
            #a = 3  # action 4:fire right orientation engine
        #elif angle_todo > +0.05:
            #a = 1  # action 2:fire left orientation engine
    return a


def demo_heuristic_lander(env, seed=None, render=False): # testing equivalent
    total_reward = 0  #initialisation
    steps = 0
    #s = env.reset(seed=seed)  #Need to replace this by something from ML ?
    while True:
        a = heuristic(env, s)
        s, r, done, info = env.step(a) # ??
        total_reward += r

        #if render:
           # still_open = env.render()  #research what render is
            #if still_open == False:
               # break

        if steps % 20 == 0 or done:
            print("observations:", " ".join([f"{x:+0.2f}" for x in s]))
            print(f"step {steps} total_reward {total_reward:+0.2f}")
        steps += 1
        if done:
            break
    #if render:
        #env.close()
    return total_reward

#### Notes:

- 'def render' refers to the graphic environment and clock of the 'pygame'
- Env refers to continous or discrete? (From description: 'To use to the _continuous_ environment, you need to specify : env = gym.make("LunarLander-v2", continuous=True)')
- 'self.particles' & 'def _create_particle' : just a decoration, referes to small particles soming out of bottom of Lunar Lander in simulation
- Box2D: A 2D physics engine for games
