In [1]:
import random
import numpy as np
# Constants
ROOM_WIDTH = 10
ROOM_HEIGHT = 10
NUM_ROOMS = 200
POPULATION_SIZE = 200
GENERATIONS = 1000
MUTATION_RATE = 0.000_001
BATTERY_CAPACITY = 200
MAXSCORE = -np.inf

In [2]:
# create random room

def room_generator():
    return np.random.randint(0, 2, size=(ROOM_HEIGHT, ROOM_WIDTH))

# create a func to generate a random secoence of actions
def stratogy_generator():
    x = np.random.randint(0, 7, size=243).reshape((3, 3, 3, 3, 3))
    return x


In [3]:

def get_view_and_action(matrix, stratogy, x, y):
    current = matrix[x, y]
    if x == 0:
        up = 2
    else:
        up = matrix[x-1, y]
    if x == len(matrix)-1:
        down = 2
    else:
        down = matrix[x+1, y]
    if y == 0:
        left = 2
    else:
        left = matrix[x, y-1]
    if y == len(matrix[0])-1:
        right = 2
    else:
        right = matrix[x, y+1]
    status = (current, up, down, left, right)
    action = stratogy[status]
    return status, action


In [4]:
# func to calculate the reward of the action
def reward(strategy, matrix, x, y):
    status, action = get_view_and_action(matrix, strategy, x, y)
    x = status[0]
    y = status[1]
    if action == 6:
        action = np.random.randint(0, 4)
    if action == 0:
        if y == len(matrix[0])-1:
            reward = -5
        else:
            y += 1
            reward = 0
    if action == 1:
        if y == 0:
            reward = -5
        else:
            y -= 1
            reward = 0
    if action == 2:
        if x == 0:
            reward = -5
        else:
            x -= 1
            reward = 0
    if action == 3:
        if x == len(matrix)-1:
            reward = -5
        else:
            x += 1
            reward = 0
    if action == 4:
        if matrix[x, y] == 1:
            matrix[x, y] = 0
            reward = 10
        else:
            reward = -1
    if action == 5:
        reward = 0

    return reward, status


In [5]:
def total_reward(strategy, matrix):
    x = 0
    y = 0
    total = 0
    for _ in range(BATTERY_CAPACITY):
        r, s = reward(strategy, matrix, x, y)
        total += r
        x = s[0]
        y = s[1]
    return total

In [6]:
# create a func to calculate the mean reward of the action_seq on the rooms
def mean_reward(strategy, rooms):
    total = 0
    for room in rooms:
        total += total_reward(strategy, room)
    return total/len(rooms)

In [7]:
# generate pupulation 
def generate_population():
    population = []
    for _ in range(POPULATION_SIZE):
        population.append(stratogy_generator())
    return population
# generate rooms
def generate_rooms():
    rooms = []
    for _ in range(NUM_ROOMS):
        rooms.append(room_generator())
    return rooms    

In [8]:
# evaluate the population
def evaluate_population(population, rooms):
    scores = []
    for strategy in population:
        score = mean_reward(strategy, rooms)
        scores.append(score)
    return scores

In [9]:
# CONVERT THE SCORES TO PROBABILITIES
def scores_to_prob(scores):
    zero_based_scores = np.array(scores) - min(scores)
    prob = zero_based_scores/sum(zero_based_scores)
    return prob

In [10]:
# create a func to mutate the children based on the MUTATION_RATE
def mutate(children):
    for child in children:
        for i in range(243):
            if np.random.rand() < MUTATION_RATE:
                child[i] = np.random.randint(0, 7)
    return children


In [11]:
# SELECT THE PARENTS and create the next generation - sexual reproduction
def select_parents(population):
    prob = scores_to_prob(evaluate_population(population, generate_rooms()))
    children = []
    for i in range(POPULATION_SIZE):
        k = np.random.randint(0, 243)
        # Use the probabilities directly in np.random.choice, solve a must be 1-dimensional error
        p1, p2 = np.random.choice(POPULATION_SIZE, size=2, p=prob)
        children.append(np.concatenate((population[p1][:k].flatten(), population[p2][k:].flatten())).reshape((3, 3, 3, 3, 3)))

    mutate(children)
        
    return children

In [19]:
import time
# create a func to get the best strategy with the highest score in the population on number of generations

def get_best_strategy():
    population = generate_population()
    for i in range(GENERATIONS):
        t1 = time.time()
        print(f"Generation {i}")
        population = select_parents(population)
        if i % 10 == 0:
            scores = evaluate_population(population, generate_rooms())
            print(scores)
            print(f"Best score: {max(scores)}","\n mean score: ",np.mean(scores))
            best_strategy = population[scores.index(max(scores))]
        t2 = time.time()
        t_total = 0
        t_total += t2-t1
        print(f"Time taken for generation {i}: {t_total}")
    return best_strategy, max(scores)

In [20]:
best_strategy, max_score = get_best_strategy()

Generation 0
[-133.775, -64.3, -260.675, -170.775, -447.76, -100.075, -205.73, -157.94, -96.91, -102.135, -36.665, -49.06, -119.145, -89.12, -346.51, -53.81, -118.545, -391.69, -103.63, -38.82, -380.27, -8.925, -545.16, -76.145, -170.665, -108.395, -205.5, -97.4, -409.585, -503.315, -460.095, -235.555, -83.645, -14.3, -1.775, -109.295, -183.525, -0.55, -96.775, -86.16, -67.845, -117.74, -89.925, -202.105, -86.16, -0.525, -158.53, -104.16, -12.24, -67.79, -376.15, -40.925, -169.59, -416.86, -67.715, -235.005, -128.145, -6.55, -99.085, -17.365, -466.985, -231.835, -417.785, -199.685, -118.63, 0.0, -91.36, -484.775, -61.69, -74.36, -402.14, -22.755, -131.155, -17.295, -28.55, -92.7, -29.67, -46.95, -275.475, -74.31, -177.115, -209.35, -207.595, -89.375, -19.655, -107.75, -400.38, -50.32, -119.03, -40.85, 0.0, -40.65, -178.39, -110.565, -199.535, -770.775, -170.895, -402.04, -151.385, -94.76, -497.035, -98.17, -401.71, -177.215, -402.09, -27.795, -199.215, -178.19, -153.02, -33.83, -22.655