In [72]:
import random
import numpy as np
# Constants
ROOM_WIDTH = 10
ROOM_HEIGHT = 10
NUM_ROOMS = 200
POPULATION_SIZE = 200
GENERATIONS = 1000
MUTATION_RATE = 0.000_001
BATTERY_CAPACITY = 200
MAXSCORE = -np.inf
M = MUTATION_RATE * POPULATION_SIZE * 243

In [73]:
# create random room

def room_generator():
    return np.random.randint(0, 2, size=(ROOM_HEIGHT, ROOM_WIDTH))

# create a func to generate a random secoence of actions
def stratogy_generator():
    x = np.random.randint(0, 7, size=243).reshape((3, 3, 3, 3, 3))
    return x


In [74]:

def get_view_and_action(matrix, stratogy, x, y):
    current = matrix[x, y]
    if x == 0:
        up = 2
    else:
        up = matrix[x-1, y]
    if x == len(matrix)-1:
        down = 2
    else:
        down = matrix[x+1, y]
    if y == 0:
        left = 2
    else:
        left = matrix[x, y-1]
    if y == len(matrix[0])-1:
        right = 2
    else:
        right = matrix[x, y+1]
    status = (current, up, down, left, right)
    action = stratogy[status]
    return status, action


In [75]:
# func to calculate the reward of the action
def reward(strategy, matrix, x, y):
    status, action = get_view_and_action(matrix, strategy, x, y)
    x = status[0]
    y = status[1]
    if action == 6:
        action = np.random.randint(0, 4)
    if action == 0:
        if y == len(matrix[0])-1:
            reward = -5
        else:
            y += 1
            reward = 0
    if action == 1:
        if y == 0:
            reward = -5
        else:
            y -= 1
            reward = 0
    if action == 2:
        if x == 0:
            reward = -5
        else:
            x -= 1
            reward = 0
    if action == 3:
        if x == len(matrix)-1:
            reward = -5
        else:
            x += 1
            reward = 0
    if action == 4:
        if matrix[x, y] == 1:
            matrix[x, y] = 0
            reward = 10
        else:
            reward = -1
    if action == 5:
        reward = 0

    return reward, status


In [68]:
def total_reward(strategy, matrix):
    x = 0
    y = 0
    total = 0
    for _ in range(BATTERY_CAPACITY):
        r, s = reward(strategy, matrix, x, y)
        total += r
        x = s[0]
        y = s[1]
    return total

In [69]:
# create a func to calculate the mean reward of the action_seq on the rooms
def mean_reward(strategy, rooms):
    total = 0
    for room in rooms:
        total += total_reward(strategy, room)
    return total/len(rooms)

In [70]:
# generate pupulation 
def generate_population():
    population = []
    for _ in range(POPULATION_SIZE):
        population.append(stratogy_generator())
    return population
# generate rooms
def generate_rooms():
    rooms = []
    for _ in range(NUM_ROOMS):
        rooms.append(room_generator())
    return rooms    

In [71]:
# evaluate the population
def evaluate_population(population, rooms):
    scores = []
    for strategy in population:
        score = mean_reward(strategy, rooms)
        scores.append(score)
    return scores

In [39]:
# CONVERT THE SCORES TO PROBABILITIES
def scores_to_prob(scores):
    zero_based_scores = np.array(scores) - min(scores)
    prob = zero_based_scores/sum(zero_based_scores)
    return prob

In [60]:
# create a func to mutate the children based on the MUTATION_RATE
def mutate(children):
    if np.random.rand() < M:
        i = np.random.randint(0, POPULATION_SIZE)
        children[i][random.randint(0,2)]\
             [random.randint(0,2)]\
                 [random.randint(0,2)]\
                     [random.randint(0,2)]\
                         [random.randint(0,2)]\
                                = np.random.randint(0, 6)
        return children


In [61]:
# SELECT THE PARENTS and create the next generation - sexual reproduction
def select_parents(population):
    prob = scores_to_prob(evaluate_population(population, generate_rooms()))
    children = []
    for i in range(POPULATION_SIZE):
        k = np.random.randint(0, 243)
        # Use the probabilities directly in np.random.choice, solve a must be 1-dimensional error
        p1, p2 = np.random.choice(POPULATION_SIZE, size=2, p=prob)
        children.append(np.concatenate((population[p1][:k].flatten(), population[p2][k:].flatten())).reshape((3, 3, 3, 3, 3)))

    mutate(children)
        
    return children

In [62]:
import time
# create a func to get the best strategy with the highest score in the population on number of generations

def get_best_strategy():
    population = generate_population()
    for i in range(GENERATIONS):
        t1 = time.time()
        print(f"Generation {i}")
        population = select_parents(population)
        if i % 10 == 0:
            scores = evaluate_population(population, generate_rooms())
            print(scores)
            print(f"Best score: {max(scores)}","\n mean score: ",np.mean(scores))
            best_strategy = population[scores.index(max(scores))]
        t2 = time.time()
        t_total = 0
        t_total += t2-t1
        print(f"Time taken for generation {i}: {t_total}")
    return best_strategy, max(scores)

In [63]:
best_strategy, max_score = get_best_strategy()

Generation 0
[-47.05, -91.9, -169.75, -87.85, -156.85, -50.0, -350.6, -217.15, -261.0, -166.9, -165.65, -126.65, -158.9, -145.15, -161.65, -109.05, -99.95, -33.3, -232.15, -91.55]
Best score: -33.3 
 mean score:  -146.1525
Time taken for generation 0: 0.5629727840423584
Generation 1
Time taken for generation 1: 0.2594914436340332
Generation 2
Time taken for generation 2: 0.293689489364624
Generation 3
Time taken for generation 3: 0.26677608489990234
Generation 4
Time taken for generation 4: 0.27016282081604004
Generation 5
Time taken for generation 5: 0.3373219966888428
Generation 6
Time taken for generation 6: 0.23310446739196777
Generation 7
Time taken for generation 7: 0.25515151023864746
Generation 8
Time taken for generation 8: 0.2785913944244385
Generation 9
Time taken for generation 9: 0.22609615325927734
Generation 10
[-48.55, -51.15, -46.65, -44.15, -47.15, -47.4, -49.4, -43.4, -47.65, -47.15, -36.5, -46.65, -47.9, -51.15, -50.4, -45.65, -48.9, -45.9, -46.4, -48.65]
Best score

KeyboardInterrupt: 