In [None]:
import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
from time import sleep
from deap import base, creator, tools, algorithms
import random

In [None]:
def pid(state, params):
    """ calculates settings based on pid control """
    # PID parameters
    kp_alt = params[0]  # proportional altitude
    kd_alt = params[1]  # derivative altitude
    kp_ang = params[2]  # proportional angle
    kd_ang = params[3]  # derivative angle
    
    # Calculate setpoints (target values)
    alt_tgt = np.abs(state[0])
    ang_tgt = (.25*np.pi)*(state[0]+state[2])

    # Calculate error values
    alt_error = (alt_tgt - state[1])
    ang_error = (ang_tgt - state[4])
    
    # Use PID to get adjustments
    alt_adj = kp_alt*alt_error + kd_alt*state[3]
    ang_adj = kp_ang*ang_error + kd_ang*state[5]
        
    # Gym wants them as np array (-1,1)
    a = np.array([alt_adj, ang_adj])
    a = np.clip(a, -1, +1)
    
    # If the legs are on the ground we made it, kill engines
    if(state[6] or state[7]):
        a[:] = 0   
    return a

In [None]:
def run(params, env, verbose=False):
    """ runs an episode given pid parameters """
    state_list = []
    terminated = False
    truncated = False
    state, _ = env.reset()
    if verbose:
        env.render()
        sleep(.005)
    state_list.append(state)
    total = 0
    while (not terminated) and (not truncated):
        a = pid(state,params)
        new_state,reward, terminated, truncated, _ = env.step(a)
        total += reward
        if verbose:
            env.render()
            sleep(.005)
        state_list.append(new_state)
        state = new_state
    return total, state_list

In [None]:
def evalParams(individual, env):
    # test params over 5 trial avg
    scores = []
    for trial in range(5):
        score, _ = run(individual,env)
        scores.append(score)
    mean_score = np.mean(scores)
    return mean_score,

def init_ga():
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMax)

    toolbox = base.Toolbox()
    toolbox.register("attr_float", random.uniform, a=-10., b=10.)
    toolbox.register("individual", tools.initRepeat, creator.Individual,
                     toolbox.attr_float, n=4)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("evaluate", evalParams, env=env)
    toolbox.register("mate", tools.cxOnePoint)
    toolbox.register("mutate", tools.mutGaussian, mu=0., sigma=1., indpb=0.25)
    toolbox.register("select", tools.selBest)
    return toolbox


def optimize(NIND, NGEN, toolbox):
    """Optimize PID gains using GA. """

    pop = toolbox.population(n=NIND)
    hof = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)

    pop, log = algorithms.eaMuPlusLambda(pop, toolbox, mu=NIND, lambda_=NIND, cxpb=0.5, mutpb=0.2,
                                         ngen=NGEN, stats=stats, halloffame=hof, verbose=True)

    params = tools.selBest(pop, 1)[0]
    return params

In [None]:
# Setup environment
env = gym.make("LunarLanderContinuous-v2")
state, info = env.reset(seed=42)

#RUN GA
NIND = 20
NGEN = 30
toolbox = init_ga()
params = optimize(NIND, NGEN, toolbox)
print(f"The best parameters are {params}")
print(f"The best fitness is {params.fitness.values[0]}")
params = np.array(params)
env.close()

In [None]:
#Final runs with tuned PID
env = gym.make("LunarLanderContinuous-v2", render_mode = "human")
env._max_episode_steps = 300
state, info = env.reset(seed=42)
for _ in range(5):
    total, state_list = run(params, env, verbose=True)
env.close()

In [None]:
def graph(states):
        states_reshaped = np.array(states).reshape(len(states),-1)
        plt.plot(states_reshaped[:,0],label='x')
        plt.plot(states_reshaped[:,1],label='y')
        plt.plot(states_reshaped[:,2],label='vx')
        plt.plot(states_reshaped[:,3],label='vy')
        plt.plot(states_reshaped[:,4],label='theta')
        plt.plot(states_reshaped[:,5],label='vtheta')
        plt.legend()
        plt.grid()
        plt.ylim(-1.1,1.1)
        plt.title('PID Control')
        plt.ylabel('Value')
        plt.xlabel('Steps')
        plt.show()

graph(state_list)
    
