In [102]:
import torch
import json
import math
import threading
import multiprocessing
import queue

from Simulation import Simulation
from SteeringModel import LanderSteeringModel

In [103]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
print(f"Using device: {device}")

Using device: cpu


In [104]:
def load_settings():
    with open("settings.json") as file:
        settings = json.load(file)
    return settings

In [105]:
settings = load_settings()
tower_arm_settings = settings["terrain"]["tower"]["towerArm"]
target_catch_pin_position_x = tower_arm_settings["xMax"] - tower_arm_settings["xMin"]
target_catch_pin_position_y = tower_arm_settings["yMax"] + settings["lander"]["catchPin"]["radius"]
target_catch_pin_position = (target_catch_pin_position_x, target_catch_pin_position_y)
target_angle = 0
target_velocity = (0, 0)
target_angular_velocity = 0


def loss_function(telemetry):
    catch_pin_position = telemetry['catch_pin_position']
    angle = telemetry['angle']
    velocity = telemetry['velocity']
    angular_velocity = telemetry['angular_velocity']

    position_loss = math.sqrt((catch_pin_position[0] - target_catch_pin_position[0])**2 + (catch_pin_position[1] - target_catch_pin_position[1])**2)
    angle_loss = abs(angle - target_angle)
    velocity_loss = math.sqrt((velocity[0] - target_velocity[0])**2 + (velocity[1] - target_velocity[1])**2)
    angular_velocity_loss = abs(angular_velocity - target_angular_velocity)

    # loss = 0
    # loss += position_loss
    # loss += velocity_loss * (1 + loss)
    # loss += angle_loss * (1 + loss)
    # loss += angular_velocity_loss * (1 + loss)

    # loss = position_loss if position_loss < 100 else position_loss * 10
    # loss += velocity_loss
    # loss = loss * (1 + angle_loss)
    # loss = loss * (1 + angular_velocity_loss)

    loss = 0
    loss += 1000 if position_loss > 1000 else 0
    loss += 500 if position_loss > 500 else 0
    loss += 200 if position_loss > 200 else 0
    loss += 100 if position_loss > 100 else 0
    loss += position_loss
    loss += velocity_loss * 20 if position_loss > 100 else 0
    loss += velocity_loss * 5 if position_loss > 50 else 0
    loss += velocity_loss * 2 if position_loss > 25 else 0
    loss += velocity_loss
    loss += angle_loss * 10 if position_loss > 100 and velocity_loss > 20 else 0
    loss += angle_loss * 5 if position_loss > 50 and velocity_loss > 10 else 0
    loss += angle_loss * 2 if position_loss > 25 and velocity_loss > 5 else 0
    loss += angle_loss
    # loss += angular_velocity_loss if position_loss < 25 and velocity_loss < 5 and angle_loss < math.pi / 6 else 100


    # loss = position_loss + angle_loss + velocity_loss + angular_velocity_loss
    return torch.tensor(loss, dtype=torch.float32)

In [106]:
def to_binary_steering(steeering_input):
    binary_steering = {}
    for key, value in steeering_input.items():
        binary_steering[key] = value > 0.5
    return binary_steering

In [107]:
lander_initial_position = settings["landerInitialPosition"]["x"], settings["landerInitialPosition"]["y"]
simulation_iterations_per_step = settings["simulationIterationsPerStep"]
simulation_steps_per_second = settings["simulationStepsPerSecond"]

In [108]:
def evaluate_population(population, lander_initial_position):
    def evaluate(models, indexes, lander_initial_position, queue):
        simulation = Simulation(settings, lander_initial_position, simulation_iterations_per_step)
        for index, model in zip(indexes, models):
            simulation.reset()
    
            running = True
            step_count = 0
            max_steps = 20 * simulation_steps_per_second
            while running:
                step_count += 1
                result, telemetry = simulation.step(1 / simulation_steps_per_second)

                steering_input = model(telemetry)
                binary_steering = to_binary_steering(steering_input)
                simulation.set_steering_input(binary_steering)

                if result is not None or step_count > max_steps:
                    running = False

            loss = loss_function(telemetry).item()
            queue.put((index, loss))

    queue = multiprocessing.Queue()
    num_processes = 10
    processes = []
    for i in range(num_processes):
        index_range_start = i * (len(population) // num_processes)
        index_range_end = (i + 1) * (len(population) // num_processes) if i < num_processes - 1 else len(population)
        indexes = range(index_range_start, index_range_end)

        process = multiprocessing.Process(target=evaluate, args=(population[index_range_start:index_range_end], indexes, lander_initial_position, queue))
        process.start()
        processes.append(process)

    for process in processes:
        process.join()

    results = {}
    for i in range(len(population)):
        index, loss = queue.get()
        results[index] = loss
    
    losses = [results[i] for i in range(len(population))]
    return losses

In [109]:
def select_best_models(population):
    # Select the best 50% of the population, based on loss
    population_size = len(population)
    probabilities = [(population_size - i) for i in range(population_size)]
    probabilities_sum = sum(probabilities)
    probabilities = [p / probabilities_sum for p in probabilities]

    selected_indexes = torch.multinomial(torch.tensor(probabilities, dtype=torch.float32), population_size // 2, replacement=False).tolist()
    selected_population = [population[i] for i in selected_indexes]
    return selected_population

def crossover(parent1, parent2):
    child = LanderSteeringModel(parent1.device).to(device)
    for child_param, parent1_param, parent2_param in zip(child.parameters(), parent1.parameters(), parent2.parameters()):
        # child_param.data = parent1_param.data if torch.rand(1) < 0.5 else parent2_param.data
        child_param.data = (parent1_param.data + parent2_param.data) / 2
    return child

def mutate(model, mutation_rate):
    for param in model.parameters():
        mutation = torch.rand(param.size(), device=device) * mutation_rate
        # mutate 10% of the weights and biases
        mutation_mask = torch.rand(param.size(), device=device) < 0.1
        param.data += mutation * mutation_mask
    return model

num_epochs = 1000
population_size = 500
population = [LanderSteeringModel(device) for _ in range(population_size)]
for i in range(population_size):
    for param in population[i].parameters():
        param.data = torch.randn(param.size(), device=device)

for epoch in range(num_epochs):
    lander_initial_position = settings["landerInitialPosition"]["x"] + (torch.randn(1).item()) * 200, settings["landerInitialPosition"]["y"] + torch.randn(1).item() * 200
    polulation_losses = evaluate_population(population, lander_initial_position)

    # Sort by loss
    population = [model for _, model in sorted(zip(polulation_losses, population), key=lambda pair: pair[0])]
    polulation_losses.sort()

    best_model = population[0]

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {polulation_losses[0]:.2f}\t", end="")
    print(f"{polulation_losses[0]:.2f}\t{polulation_losses[1]:.2f}\t{polulation_losses[2]:.2f}\t{polulation_losses[3]:.2f}\t{polulation_losses[int(0.1 * population_size)]:.2f}\t{polulation_losses[int(0.2 * population_size)]:.2f}\t{polulation_losses[int(0.5 * population_size)]:.2f}\t{polulation_losses[int(0.9 * population_size)]:.2f}")
    if (epoch + 1) % 10 == 0:
        print("Saving model")
        print()
        torch.save(best_model.state_dict(), f"model_{epoch + 1}.pth")


    # Select the best 50% of the population
    selected_population = select_best_models(population)

    # Crossover the selected population
    mutation_rate = 0.02
    crossover_population = []
    for _ in range(population_size - len(selected_population)):
        parent1 = selected_population[torch.randint(len(selected_population), (1,)).item()]
        parent2 = selected_population[torch.randint(len(selected_population), (1,)).item()]
        child = crossover(parent1, parent2)
        child = mutate(child, mutation_rate)
        crossover_population.append(child)

    # Replace population
    population = selected_population + crossover_population


# save the best model
torch.save(best_model.state_dict(), "model.pth")


Epoch [1/50], Loss: 124.19	124.19	137.41	188.63	192.15	1070.04	1793.70	3727.71	8067.24
Epoch [2/50], Loss: 30.19	30.19	34.26	35.02	40.30	137.67	373.64	1962.07	5794.92
Epoch [3/50], Loss: 89.76	89.76	97.83	99.85	112.23	640.88	1226.34	3091.04	5647.63
Epoch [4/50], Loss: 73.25	73.25	98.31	101.63	116.85	425.27	679.01	2429.39	5257.68
Epoch [5/50], Loss: 115.28	115.28	124.17	127.46	144.25	414.19	900.09	2884.95	5592.00
Epoch [6/50], Loss: 76.84	76.84	131.40	142.75	143.00	513.19	1087.49	3037.38	6308.05
Epoch [7/50], Loss: 110.29	110.29	124.49	218.67	283.91	722.34	1457.32	2582.76	5282.40
Epoch [8/50], Loss: 57.05	57.05	58.40	59.40	61.01	113.80	234.74	707.63	3486.48
Epoch [9/50], Loss: 117.21	117.21	329.97	343.16	481.00	968.20	1269.66	2045.00	3077.06
Epoch [10/50], Loss: 58.87	58.87	59.19	89.28	95.91	246.98	621.49	1770.38	3691.26
Saving model

Epoch [11/50], Loss: 80.98	80.98	83.23	93.20	94.25	144.12	364.82	980.19	3434.83
Epoch [12/50], Loss: 125.10	125.10	157.68	297.03	312.52	721.37	1071.76	179

In [110]:
torch.save(best_model.state_dict(), "model.pth")


In [111]:
torch.randn(1).item()


0.42031770944595337