In [1]:
import numpy as np
import random
import enviroment_no_visual as enviroment

In [2]:
def binary_vector_to_decimal(vector):    
    # Converti il vettore binario in una stringa binaria
    binary_string = ''.join(map(str, vector))
    decimal_value = int(binary_string, 2)
    return decimal_value

In [3]:
# Crea un array con tutte le combinazioni di 0 e 1 di lunghezza n
def generate_boolean_vectors(n):
    return np.array(np.meshgrid(*[np.arange(2)] * n)).T.reshape(-1, n)

def initialize_QValues():
    Q_table = np.zeros((2048, 3))
    all_states = generate_boolean_vectors(11)
    for s in all_states:
        if (s[3] + s[4] + s[5] + s[6]) != 1: # controlla che esista una sola direzione
            impossible_row = binary_vector_to_decimal(s)
            Q_table[impossible_row, :] = -np.inf
        if (s[7] + s[8]) > 1 or (s[9] + s[10]) > 1 or ((s[7] + s[8] + s[9] + s[10]) == 0): # controlla che il frutto non sia a destra e a sinistra o sopra e sotto
            impossible_row = binary_vector_to_decimal(s)
            Q_table[impossible_row, :] = -np.inf
    return Q_table

In [4]:
def random_policy():
    return np.random.randint(3)

In [5]:
def exploration_policy(Q_table, state, epsilon):
    rand = random.uniform(0,1)
    if rand < epsilon:
        return np.random.randint(3)
    else:
        row_Q_value = binary_vector_to_decimal(state)
        return np.argmax(Q_table[row_Q_value])

In [6]:
def step(env, action):
    final_move = [0,0,0]
    final_move[action] = 1
    new_state, reward, game_over, _ = env.play_step_QL(final_move)
    return new_state, reward, game_over

In [7]:
alpha = 0.7  # learning rate
gamma = 0.90  # discount factor
epsilon = 0.3

env = enviroment.SnakeGameAI(200, 200)
env.reset()
Q_table = initialize_QValues()
N_STEPS = 1_000_000
for iteration in range(N_STEPS):
    state = env.get_state()
    row_state = binary_vector_to_decimal(state)
    epsilon = max((N_STEPS - iteration)/N_STEPS, 0.1)
    action = exploration_policy(Q_table, state, epsilon)
    next_state, reward, game_over = step(env, action)
    row_next_state = binary_vector_to_decimal(next_state)
    next_value = Q_table[row_next_state].max()  # greedy policy at the next step

    Q_table[row_state, action] *= 1 - alpha
    Q_table[row_state, action] += alpha * (reward + gamma * next_value)
    if game_over:
        env.reset()


In [12]:
np.save('Q_table/Q_table.npy', Q_table)

Calcolati i Q-value effettua la scelta della prossima mossa in base all'azione con maggiore Q-value

In [9]:
Q_table = np.load('Q_table/Q_table.npy')
env = enviroment.SnakeGameAI()
cumulative_score = 0
MAX_N_GAMES = 100
max_score = 0

for n_game in range(MAX_N_GAMES):
    env.reset()
    game_over = False
    while not game_over:
        state= env.get_state()
        row_Q_value = binary_vector_to_decimal(state)
        action = np.argmax(Q_table[row_Q_value])
        final_move = [0,0,0]
        final_move[action] = 1
        new_state, reward, game_over, score = env.play_step_QL(final_move)
    if score > max_score:
        max_score = score
    cumulative_score += score

    
print(f"Mean score: {cumulative_score/ MAX_N_GAMES}\nMax score: {max_score}", end="")

Mean score: 19.09
Max score: 41

In [10]:
import enviroment as enviroment_visual
Q_table = np.load('Q_table/Q_table.npy')
env_visual = enviroment_visual.SnakeGameAI()
env_visual.reset()
game_over = False

while not game_over:
    state= env_visual.get_state()
    row_Q_value = binary_vector_to_decimal(state)
    action = np.argmax(Q_table[row_Q_value])
    final_move = [0,0,0]
    final_move[action] = 1
    new_state, reward, game_over, score = env_visual.play_step_QL(final_move)
    
print(f"Score: {score}", end="")

pygame 2.6.0 (SDL 2.28.4, Python 3.12.4)
Hello from the pygame community. https://www.pygame.org/contribute.html
Score: 16