In [1]:
import torch
import json
import math
import threading
import multiprocessing
import queue

from Simulation import Simulation
from SteeringModel import LanderSteeringModel

pygame 2.6.1 (SDL 2.28.4, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
print(f"Using device: {device}")

Using device: cpu


In [3]:
def load_settings():
    with open("settings.json") as file:
        settings = json.load(file)
    return settings

In [4]:
settings = load_settings()
tower_arm_settings = settings["terrain"]["tower"]["towerArm"]
target_catch_pin_position_x = (tower_arm_settings["xMax"] + tower_arm_settings["xMin"]) / 2
target_catch_pin_position_y = tower_arm_settings["yMax"] + settings["lander"]["catchPin"]["radius"]
target_catch_pin_position = (target_catch_pin_position_x, target_catch_pin_position_y)
target_angle = 0
target_velocity = (0, 0)
target_angular_velocity = 0


def loss_function(telemetry):
    catch_pin_position = telemetry['catch_pin_position']
    angle = telemetry['angle']
    velocity = telemetry['velocity']
    angular_velocity = telemetry['angular_velocity']

    position_loss = math.sqrt((catch_pin_position[0] - target_catch_pin_position[0])**2 + (catch_pin_position[1] - target_catch_pin_position[1])**2)
    angle_loss = abs(angle - target_angle)
    velocity_loss = math.sqrt((velocity[0] - target_velocity[0])**2 + (velocity[1] - target_velocity[1])**2)
    angular_velocity_loss = abs(angular_velocity - target_angular_velocity)

    # loss = 0
    # if angle_loss > math.pi / 2:
    #     loss += 80000
    #     loss += angle_loss * 10
    # elif angular_velocity_loss > 0.5:
    #     loss += 40000
    #     loss += angle_loss * 10
    #     loss += angular_velocity_loss * 10
    # elif velocity_loss > 10:
    #     loss += 20000
    #     loss += angle_loss * 10
    #     loss += angular_velocity_loss * 10
    #     loss += velocity_loss
    # elif position_loss > 25:
    #     loss += 10000
    #     loss += angle_loss * 10
    #     loss += angular_velocity_loss * 10
    #     loss += velocity_loss
    #     loss += position_loss
    # else:
    #     loss += angle_loss * 10
    #     loss += angular_velocity_loss * 10
    #     loss += velocity_loss
    #     loss += position_loss

    # loss = 0
    # loss += 10000 if angle_loss > math.pi / 2 else 0
    # loss += angle_loss * 10
    # loss += 10000 if angular_velocity_loss > 0.5 else 0
    # loss += angular_velocity_loss * 10
    # loss += 10000 if velocity_loss > 10 else 0
    # loss += velocity_loss
    # loss += 10000 if position_loss > 25 else 0
    # loss += position_loss

    position_loss = math.exp(min(position_loss, 25) / 10) -1 + position_loss / 10
    angle_loss = angle_loss * 10 + math.exp(angle_loss) - 1
    velocity_loss = math.exp(min(velocity_loss, 5) / 2.5) -1 + velocity_loss / 2.5
    angular_velocity_loss = angular_velocity_loss * 10 + math.exp(angular_velocity_loss) - 1
    loss = position_loss + angle_loss + velocity_loss + angular_velocity_loss
    # loss = angle_loss
    # loss = position_loss + angle_loss + velocity_loss + angular_velocity_loss
    return torch.tensor(loss, dtype=torch.float32)

In [5]:
def to_binary_steering(steeering_input):
    binary_steering = {}
    for key, value in steeering_input.items():
        binary_steering[key] = value > 0.5
    return binary_steering

In [6]:
lander_initial_position = settings["landerInitialPosition"]["x"], settings["landerInitialPosition"]["y"]
simulation_iterations_per_step = settings["simulationIterationsPerStep"]
simulation_steps_per_second = settings["simulationStepsPerSecond"]

In [7]:
def evaluate_population(population, lander_initial_position, lander_initial_angle, simulation_duration):
    def evaluate(models, indexes, lander_initial_position, queue):
        simulation = Simulation(settings, lander_initial_position, simulation_iterations_per_step, lander_initial_angle=lander_initial_angle)
        for index, model in zip(indexes, models):
            simulation.reset()
    
            running = True
            step_count = 0
            max_steps = simulation_duration * simulation_steps_per_second
            while running:
                step_count += 1
                result, telemetry = simulation.step(1 / simulation_steps_per_second)

                steering_input = model(telemetry)
                binary_steering = to_binary_steering(steering_input)
                simulation.set_steering_input(binary_steering)

                if result is not None or step_count > max_steps:
                    running = False

            loss = loss_function(telemetry).item()
            queue.put((index, loss))

    queue = multiprocessing.Queue()
    num_processes = 10
    processes = []
    for i in range(num_processes):
        index_range_start = i * (len(population) // num_processes)
        index_range_end = (i + 1) * (len(population) // num_processes) if i < num_processes - 1 else len(population)
        indexes = range(index_range_start, index_range_end)

        process = multiprocessing.Process(target=evaluate, args=(population[index_range_start:index_range_end], indexes, lander_initial_position, queue))
        process.start()
        processes.append(process)

    for process in processes:
        process.join()

    results = {}
    for i in range(len(population)):
        index, loss = queue.get()
        results[index] = loss
    
    losses = [results[i] for i in range(len(population))]
    return losses

In [8]:
def select_survivors(population):
    # Select the best 50% of the population, based on loss
    population_size = len(population)
    probabilities = [(population_size - i)**2 for i in range(population_size)]
    probabilities_sum = sum(probabilities)
    probabilities = [p / probabilities_sum for p in probabilities]

    selected_indexes = torch.multinomial(torch.tensor(probabilities, dtype=torch.float32), population_size // 2, replacement=False).tolist()
    selected_population = [population[i] for i in selected_indexes]
    return selected_population

def crossover(parent1, parent2):
    child = LanderSteeringModel(parent1.device, target_catch_pin_position).to(device)
    child.train()
    for child_param, parent1_param, parent2_param in zip(child.parameters(), parent1.parameters(), parent2.parameters()):
        # child_param.data = parent1_param.data if torch.rand(1) < 0.5 else parent2_param.data
        # child_param.data = (parent1_param.data + parent2_param.data) / 2
        child_param.data = parent1_param.data
    return child

def mutate(model, mutation_rate):
    for param in model.parameters():
        # mutate 10% of the weights and biases
        mutation_mask = torch.rand(param.size(), device=device) < 0.1
        mutation = (torch.rand(param.size(), device=device) - 0.5) * mutation_rate * 2
        param.data += mutation * mutation_mask
    return model


def train(initial_population, epochs, mutation_rate, simulation_duration, lander_initial_position, lander_initial_position_deviation, lander_initial_angle, lander_initial_angle_deviation):
    population = initial_population
    population_size = len(population)

    print(f"Epoch\t\tBest\t\t10%\t\t20%\t\t50%\t\t90%")
    for epoch in range(epochs):
        lander_initial_position_x = lander_initial_position[0] + (torch.rand(1).item() - 0.5) * 2 * lander_initial_position_deviation[0]
        lander_initial_position_y = lander_initial_position[1] + (torch.rand(1).item() - 0.5) * 2 * lander_initial_position_deviation[1]
        lander_initial_position = lander_initial_position_x, lander_initial_position_y
        lander_initial_angle = (torch.rand(1).item() - 0.5) * 2 * lander_initial_angle_deviation
        population_losses = evaluate_population(population, lander_initial_position, lander_initial_angle, simulation_duration)
    
        # Sort by loss
        population = [model for _, model in sorted(zip(population_losses, population), key=lambda pair: pair[0])]
        population_losses.sort()
    
        print(f"\r{epoch + 1}/{epochs}\t\t{population_losses[0]:.2f}\t\t{population_losses[int(0.1 * population_size)]:.2f}\t\t{population_losses[int(0.2 * population_size)]:.2f}\t\t{population_losses[int(0.5 * population_size)]:.2f}\t\t{population_losses[int(0.9 * population_size)]:.2f}", end="")
        if (epoch + 1) % 10 == 0:
            print()
    
        if epoch == epochs - 1:
            print()
            return population

        # Select the best 50% of the population
        survivor_population = select_survivors(population)
    
        # Crossover the selected population
        crossover_population = []
        for _ in range(population_size - len(survivor_population)):
            parent1 = survivor_population[torch.randint(len(survivor_population), (1,)).item()]
            parent2 = survivor_population[torch.randint(len(survivor_population), (1,)).item()]
            child = crossover(parent1, parent2)
            child = mutate(child, mutation_rate)
            crossover_population.append(child)
    
        # Replace population
        population = survivor_population + crossover_population

In [9]:
population_size = 300
population = [LanderSteeringModel(device, target_catch_pin_position) for _ in range(population_size)]
for i in range(population_size):
    for param in population[i].parameters():
        param.data = torch.randn(param.size(), device=device)
    population[i].train()

num_epochs = 200
mutation_rate = 0.05
simulation_duration = 10
lander_initial_position = (100, 150)
lander_initial_position_deviation = (10, 20)
lander_initial_angle = 0
lander_initial_angle_deviation = math.pi / 36
population = train(population, num_epochs, mutation_rate, simulation_duration, lander_initial_position, lander_initial_position_deviation, lander_initial_angle, lander_initial_angle_deviation)

num_epochs = 200
mutation_rate = 0.025
simulation_duration = 20
lander_initial_position = (150, 200)
lander_initial_position_deviation = (10, 20)
lander_initial_angle = 0
lander_initial_angle_deviation = math.pi / 36
population = train(population, num_epochs, mutation_rate, simulation_duration, lander_initial_position, lander_initial_position_deviation, lander_initial_angle, lander_initial_angle_deviation)

num_epochs = 200
mutation_rate = 0.025
simulation_duration = 30
lander_initial_position = (200, 300)
lander_initial_position_deviation = (10, 20)
lander_initial_angle = 0
lander_initial_angle_deviation = math.pi / 36
population = train(population, num_epochs, mutation_rate, simulation_duration, lander_initial_position, lander_initial_position_deviation, lander_initial_angle, lander_initial_angle_deviation)


# save the best model
best_model = population[0]
torch.save(best_model.state_dict(), "model.pth")

Epoch		Best		10%		20%		50%		90%
10/200		20.53		30.62		30.65		30.89		67.544
20/200		94.68		96.27		96.27		96.27		96.27
30/200		58.62		58.66		58.66		58.66		102.582.43
40/200		141.51		155.32		155.32		155.32		155.32
50/200		170.14		173.54		173.54		173.54		173.54
60/200		255.30		255.84		255.84		255.84		255.84
70/200		114.78		114.78		114.78		157.05		157.51
80/200		134.04		134.28		134.28		134.28		139.35
90/200		135.45		135.49		135.49		135.49		135.49
100/200		119.40		119.40		119.40		125.38		125.38
110/200		99.87		99.90		99.90		99.90		99.901.508
120/200		118.06		119.94		121.86		123.03		124.90
130/200		38.86		41.08		41.08		41.08		41.1045.60
140/200		51.01		66.82		66.91		66.91		66.9564.38
150/200		47.97		65.13		65.13		108.77		108.99.87
160/200		112.93		112.95		112.95		113.39		113.39
170/200		122.89		122.94		122.94		122.94		122.94
180/200		68.60		78.91		133.36		133.36		133.3635
190/200		100.05		108.80		108.80		108.80		108.80
200/200		102.89		104.31		104.31		104.31		104.31

Epoch		Best		10%		20%		50

In [10]:
torch.save(best_model.state_dict(), "model.pth")


In [11]:
test = [0.01 * i for i in range(314)]
for i in test:
    print(i,  10 * (math.exp(i) - 1))

0.0 0.0
0.01 0.10050167084167949
0.02 0.20201340026755776
0.03 0.3045453395351694
0.04 0.4081077419238821
0.05 0.5127109637602412
0.06 0.6183654654535964
0.07 0.7250818125421654
0.08 0.8328706767495864
0.09 0.9417428370521042
0.1 1.0517091807564771
0.11 1.1627807045887129
0.12 1.2749685157937574
0.13 1.3882838332462177
0.14 1.5027379885722736
0.15 1.6183424272828306
0.16 1.7351087099181028
0.17 1.8530485132036545
0.18 1.9721736312181015
0.19 2.092495976572515
0.2 2.2140275816016985
0.21 2.336780599567432
0.22 2.4607673058738078
0.23 2.5860000992947785
0.24 2.7124915032140473
0.25 2.840254166877414
0.26 2.969300866657718
0.27 3.099644507332473
0.28 3.2312981233743687
0.29 3.3642748802547207
0.3 3.498588075760032
0.31 3.634251141321778
0.32 3.771277643359572
0.33 3.9096812846378026
0.34 4.049475905635937
0.35000000000000003 4.190675485932573
0.36 4.333294145603402
0.37 4.477346146633245
0.38 4.622845894342245
0.39 4.7698079388264265
0.4 4.9182469764127035
0.41000000000000003 5.0681778511