In [3]:
import numpy as np

maze = np.array([
    [1., 0., 1., 1., 1., 1., 1., 1.],
    [1., 0., 1., 1., 1., 0., 1., 1.],
    [1., 1., 1., 1., 0., 1., 0., 1.],
    [1., 1., 1., 0., 1., 1., 1., 1.],
    [1., 1., 0., 1., 1., 1., 1., 1.],
    [1., 1., 1., 0., 1., 0., 0., 0.],
    [1., 1., 1., 0., 1., 1., 1., 1.],
    [1., 1., 1., 1., 0., 1., 1., 1.]
])



In [1]:
import numpy as np
import random
import datetime

# Deep Q-Learning training function
def qtrain(model, maze, **opt):
    from TreasureMaze import TreasureMaze, LEFT, UP, RIGHT, DOWN
    from GameExperience import GameExperience

    # Training parameters
    n_epoch = opt.get('n_epoch', 15000)
    max_memory = opt.get('max_memory', 1000)
    data_size = opt.get('data_size', 50)
    epsilon = opt.get('epsilon', 1.0)
    epsilon_min = opt.get('epsilon_min', 0.05)
    epsilon_decay = opt.get('epsilon_decay', 0.995)

    qmaze = TreasureMaze(maze)
    experience = GameExperience(model, max_memory=max_memory)

    win_history = []
    hsize = qmaze.maze.size // 2
    win_rate = 0.0
    start_time = datetime.datetime.now()

    for epoch in range(n_epoch):
        start_cell = random.choice(qmaze.free_cells)
        qmaze.reset(start_cell)
        envstate = qmaze.observe()
        game_over = False
        n_episodes = 0
        total_loss = 0.0

        while not game_over:
            # Explore or exploit
            if np.random.rand() < epsilon:
                action = random.choice([LEFT, UP, RIGHT, DOWN])
            else:
                q_values = model.predict(envstate)
                action = np.argmax(q_values[0])

            prev_envstate = envstate
            envstate, reward, status = qmaze.act(action)
            episode = (prev_envstate, action, reward, envstate, status in ('win', 'lose'))
            experience.remember(episode)

            # Only train if enough memory is stored
            if len(experience.memory) >= data_size:
                inputs, targets = experience.get_data(data_size)
                history = model.fit(inputs, targets, epochs=1, verbose=0)
                loss = history.history['loss'][0]
                total_loss += loss
                n_episodes += 1

            if status in ('win', 'lose'):
                game_over = True
                win_history.append(1 if status == 'win' else 0)

        # Decay epsilon
        if epsilon > epsilon_min:
            epsilon *= epsilon_decay

        # Update win rate
        if len(win_history) > hsize:
            recent_history = win_history[-hsize:]
            win_rate = sum(recent_history) / len(recent_history)

        dt = datetime.datetime.now() - start_time
        avg_loss = total_loss / n_episodes if n_episodes > 0 else 0
        print(f"Epoch: {epoch+1}/{n_epoch} | Loss: {avg_loss:.4f} | Episodes: {n_episodes} | Wins: {sum(win_history)} | Win rate: {win_rate:.3f} | Time: {format_time(dt.total_seconds())}")

        if win_rate > 0.9 and len(win_history) >= hsize:
            print(f"Reached {win_rate*100:.1f}% win rate, stopping training at epoch {epoch+1}")
            break

    total_time = datetime.datetime.now() - start_time
    print(f"Training completed in {format_time(total_time.total_seconds())}")
    return total_time.total_seconds()


def format_time(seconds):
    if seconds < 400:
        return f"{seconds:.1f} seconds"
    elif seconds < 4000:
        return f"{seconds/60:.2f} minutes"
    else:
        return f"{seconds/3600:.2f} hours"



In [2]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

def build_model(maze):
    input_size = maze.size
    hidden_size = 64  # You can adjust this
    output_size = 4   # LEFT, UP, RIGHT, DOWN

    model = Sequential()
    model.add(Dense(hidden_size, input_shape=(input_size,), activation='relu'))
    model.add(Dense(hidden_size, activation='relu'))
    model.add(Dense(output_size, activation='linear'))
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

    return model



Using TensorFlow backend.


In [None]:
model = build_model(maze)
qtrain(model, maze, epochs=1000, max_memory=512, data_size=32)


Epoch: 1/15000 | Loss: 0.0057 | Episodes: 113 | Wins: 0 | Win rate: 0.000 | Time: 12.8 seconds
Epoch: 2/15000 | Loss: 0.0124 | Episodes: 54 | Wins: 1 | Win rate: 0.000 | Time: 15.8 seconds
Epoch: 3/15000 | Loss: 0.0177 | Episodes: 139 | Wins: 1 | Win rate: 0.000 | Time: 23.3 seconds
Epoch: 4/15000 | Loss: 0.0158 | Episodes: 49 | Wins: 2 | Win rate: 0.000 | Time: 25.6 seconds
Epoch: 5/15000 | Loss: 0.0182 | Episodes: 147 | Wins: 2 | Win rate: 0.000 | Time: 33.8 seconds
Epoch: 6/15000 | Loss: 0.0240 | Episodes: 18 | Wins: 3 | Win rate: 0.000 | Time: 34.9 seconds
Epoch: 7/15000 | Loss: 0.0168 | Episodes: 150 | Wins: 3 | Win rate: 0.000 | Time: 43.9 seconds
Epoch: 8/15000 | Loss: 0.0205 | Episodes: 144 | Wins: 3 | Win rate: 0.000 | Time: 52.2 seconds
Epoch: 9/15000 | Loss: 0.0182 | Episodes: 149 | Wins: 3 | Win rate: 0.000 | Time: 59.8 seconds
Epoch: 10/15000 | Loss: 0.0200 | Episodes: 6 | Wins: 4 | Win rate: 0.000 | Time: 60.1 seconds
Epoch: 11/15000 | Loss: 0.0202 | Episodes: 147 | Wins:

Epoch: 86/15000 | Loss: 0.0186 | Episodes: 144 | Wins: 19 | Win rate: 0.312 | Time: 9.06 minutes
Epoch: 87/15000 | Loss: 0.0159 | Episodes: 150 | Wins: 19 | Win rate: 0.281 | Time: 9.18 minutes
Epoch: 88/15000 | Loss: 0.0175 | Episodes: 144 | Wins: 19 | Win rate: 0.281 | Time: 9.30 minutes
Epoch: 89/15000 | Loss: 0.0194 | Episodes: 141 | Wins: 19 | Win rate: 0.281 | Time: 9.41 minutes
Epoch: 90/15000 | Loss: 0.0181 | Episodes: 145 | Wins: 19 | Win rate: 0.281 | Time: 9.53 minutes
Epoch: 91/15000 | Loss: 0.0020 | Episodes: 1 | Wins: 20 | Win rate: 0.312 | Time: 9.53 minutes
Epoch: 92/15000 | Loss: 0.0246 | Episodes: 144 | Wins: 20 | Win rate: 0.312 | Time: 9.65 minutes
Epoch: 93/15000 | Loss: 0.0293 | Episodes: 144 | Wins: 20 | Win rate: 0.281 | Time: 9.77 minutes
Epoch: 94/15000 | Loss: 0.0242 | Episodes: 151 | Wins: 20 | Win rate: 0.281 | Time: 9.89 minutes
Epoch: 95/15000 | Loss: 0.0170 | Episodes: 152 | Wins: 20 | Win rate: 0.250 | Time: 10.02 minutes
Epoch: 96/15000 | Loss: 0.0162 

Epoch: 170/15000 | Loss: 0.0284 | Episodes: 24 | Wins: 33 | Win rate: 0.219 | Time: 18.07 minutes
Epoch: 171/15000 | Loss: 0.0331 | Episodes: 141 | Wins: 33 | Win rate: 0.219 | Time: 18.18 minutes
Epoch: 172/15000 | Loss: 0.0259 | Episodes: 145 | Wins: 33 | Win rate: 0.219 | Time: 18.30 minutes
Epoch: 173/15000 | Loss: 0.0284 | Episodes: 153 | Wins: 33 | Win rate: 0.219 | Time: 18.42 minutes
Epoch: 174/15000 | Loss: 0.0455 | Episodes: 3 | Wins: 34 | Win rate: 0.250 | Time: 18.42 minutes
Epoch: 175/15000 | Loss: 0.0257 | Episodes: 134 | Wins: 34 | Win rate: 0.250 | Time: 18.53 minutes
Epoch: 176/15000 | Loss: 0.0250 | Episodes: 139 | Wins: 34 | Win rate: 0.250 | Time: 18.65 minutes
Epoch: 177/15000 | Loss: 0.0268 | Episodes: 51 | Wins: 35 | Win rate: 0.281 | Time: 18.69 minutes
Epoch: 178/15000 | Loss: 0.0474 | Episodes: 23 | Wins: 36 | Win rate: 0.312 | Time: 18.71 minutes
Epoch: 179/15000 | Loss: 0.0294 | Episodes: 137 | Wins: 36 | Win rate: 0.312 | Time: 18.82 minutes
Epoch: 180/1500

Epoch: 253/15000 | Loss: 0.0326 | Episodes: 136 | Wins: 53 | Win rate: 0.188 | Time: 26.46 minutes
Epoch: 254/15000 | Loss: 0.0230 | Episodes: 144 | Wins: 53 | Win rate: 0.188 | Time: 26.58 minutes
Epoch: 255/15000 | Loss: 0.0197 | Episodes: 143 | Wins: 53 | Win rate: 0.188 | Time: 26.70 minutes
Epoch: 256/15000 | Loss: 0.0235 | Episodes: 143 | Wins: 53 | Win rate: 0.188 | Time: 26.81 minutes
Epoch: 257/15000 | Loss: 0.0194 | Episodes: 147 | Wins: 53 | Win rate: 0.188 | Time: 26.93 minutes
Epoch: 258/15000 | Loss: 0.0203 | Episodes: 68 | Wins: 54 | Win rate: 0.219 | Time: 26.99 minutes
Epoch: 259/15000 | Loss: 0.0192 | Episodes: 132 | Wins: 54 | Win rate: 0.219 | Time: 27.09 minutes
Epoch: 260/15000 | Loss: 0.0241 | Episodes: 139 | Wins: 54 | Win rate: 0.219 | Time: 27.21 minutes
Epoch: 261/15000 | Loss: 0.0224 | Episodes: 137 | Wins: 54 | Win rate: 0.219 | Time: 27.32 minutes
Epoch: 262/15000 | Loss: 0.0211 | Episodes: 135 | Wins: 54 | Win rate: 0.219 | Time: 27.43 minutes
Epoch: 263/

Epoch: 336/15000 | Loss: 0.0212 | Episodes: 142 | Wins: 67 | Win rate: 0.219 | Time: 35.01 minutes
Epoch: 337/15000 | Loss: 0.0189 | Episodes: 134 | Wins: 67 | Win rate: 0.219 | Time: 35.12 minutes
Epoch: 338/15000 | Loss: 0.0246 | Episodes: 4 | Wins: 68 | Win rate: 0.250 | Time: 35.12 minutes
Epoch: 339/15000 | Loss: 0.0307 | Episodes: 139 | Wins: 68 | Win rate: 0.250 | Time: 35.24 minutes
Epoch: 340/15000 | Loss: 0.0267 | Episodes: 141 | Wins: 68 | Win rate: 0.219 | Time: 35.36 minutes
Epoch: 341/15000 | Loss: 0.0230 | Episodes: 103 | Wins: 69 | Win rate: 0.250 | Time: 35.45 minutes
Epoch: 342/15000 | Loss: 0.0313 | Episodes: 132 | Wins: 69 | Win rate: 0.250 | Time: 35.56 minutes
Epoch: 343/15000 | Loss: 0.0259 | Episodes: 133 | Wins: 69 | Win rate: 0.250 | Time: 35.67 minutes
Epoch: 344/15000 | Loss: 0.0280 | Episodes: 139 | Wins: 69 | Win rate: 0.250 | Time: 35.78 minutes
Epoch: 345/15000 | Loss: 0.0169 | Episodes: 142 | Wins: 69 | Win rate: 0.250 | Time: 35.90 minutes
Epoch: 346/1

Epoch: 419/15000 | Loss: 0.0296 | Episodes: 140 | Wins: 79 | Win rate: 0.219 | Time: 43.48 minutes
Epoch: 420/15000 | Loss: 0.0312 | Episodes: 31 | Wins: 80 | Win rate: 0.250 | Time: 43.51 minutes
Epoch: 421/15000 | Loss: 0.0243 | Episodes: 136 | Wins: 80 | Win rate: 0.219 | Time: 43.62 minutes
Epoch: 422/15000 | Loss: 0.0269 | Episodes: 144 | Wins: 80 | Win rate: 0.219 | Time: 43.73 minutes
Epoch: 423/15000 | Loss: 0.0222 | Episodes: 147 | Wins: 80 | Win rate: 0.188 | Time: 43.85 minutes
Epoch: 424/15000 | Loss: 0.0227 | Episodes: 150 | Wins: 80 | Win rate: 0.188 | Time: 43.97 minutes
Epoch: 425/15000 | Loss: 0.0211 | Episodes: 144 | Wins: 80 | Win rate: 0.156 | Time: 44.09 minutes
Epoch: 426/15000 | Loss: 0.0202 | Episodes: 140 | Wins: 80 | Win rate: 0.125 | Time: 44.20 minutes
Epoch: 427/15000 | Loss: 0.0227 | Episodes: 139 | Wins: 80 | Win rate: 0.094 | Time: 44.32 minutes
Epoch: 428/15000 | Loss: 0.0210 | Episodes: 136 | Wins: 80 | Win rate: 0.094 | Time: 44.42 minutes
Epoch: 429/

Epoch: 502/15000 | Loss: 0.0265 | Episodes: 137 | Wins: 90 | Win rate: 0.094 | Time: 52.93 minutes
Epoch: 503/15000 | Loss: 0.0323 | Episodes: 141 | Wins: 90 | Win rate: 0.094 | Time: 53.04 minutes
Epoch: 504/15000 | Loss: 0.0236 | Episodes: 137 | Wins: 90 | Win rate: 0.094 | Time: 53.16 minutes
Epoch: 505/15000 | Loss: 0.0175 | Episodes: 133 | Wins: 90 | Win rate: 0.094 | Time: 53.27 minutes
Epoch: 506/15000 | Loss: 0.0239 | Episodes: 144 | Wins: 90 | Win rate: 0.094 | Time: 53.39 minutes
Epoch: 507/15000 | Loss: 0.0233 | Episodes: 119 | Wins: 91 | Win rate: 0.125 | Time: 53.51 minutes
Epoch: 508/15000 | Loss: 0.0237 | Episodes: 134 | Wins: 91 | Win rate: 0.125 | Time: 53.63 minutes
Epoch: 509/15000 | Loss: 0.0236 | Episodes: 137 | Wins: 91 | Win rate: 0.125 | Time: 53.75 minutes
Epoch: 510/15000 | Loss: 0.0305 | Episodes: 137 | Wins: 91 | Win rate: 0.125 | Time: 53.89 minutes
Epoch: 511/15000 | Loss: 0.0279 | Episodes: 139 | Wins: 91 | Win rate: 0.125 | Time: 54.02 minutes
Epoch: 512