In [1]:
import gym
import numpy as np
import random
from keras.models import Sequential
from keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

from collections import deque

In [2]:
class DQN:
    def __init__(self, env):
        self.env     = env
        self.memory  = deque(maxlen=2000)
        
        self.gamma = 0.85
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.005
        self.tau = .125

        self.model        = self.create_model()
        self.target_model = self.create_model()

    def create_model(self):
        model   = Sequential()
        state_shape  = len(env.observation_space)
        model.add(Dense(24, input_dim=state_shape, activation="relu"))
        model.add(Dense(48, activation="relu"))
        model.add(Dense(24, activation="relu"))
        model.add(Dense(self.env.action_space.n))
        model.compile(loss="mean_squared_error",
            optimizer=Adam(lr=self.learning_rate))
        return model

    def act(self, state):
        self.epsilon *= self.epsilon_decay
        self.epsilon = max(self.epsilon_min, self.epsilon)
        if np.random.random() < self.epsilon:
            return self.env.action_space.sample()
        prediction = self.model.predict(state)[0]
        # softmax???
        binarized_prediction = [1 if i > 0 else 0 for i in prediction]
        return binarized_prediction

    def remember(self, state, action, reward, new_state, done):
        self.memory.append([state, action, reward, new_state, done])

    def replay(self):
        batch_size = 32
        if len(self.memory) < batch_size: 
            return

        samples = random.sample(self.memory, batch_size)
        for sample in samples:
            state, action, reward, new_state, done = sample
            target = self.target_model.predict(state)
            if done:
                target[0][action] = reward
            else:
                Q_future = max(self.target_model.predict(new_state)[0])
                target[0][action] = reward + Q_future * self.gamma
            self.model.fit(state, target, epochs=1, verbose=0)

    def target_train(self):
        weights = self.model.get_weights()
        target_weights = self.target_model.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)
        self.target_model.set_weights(target_weights)

    def save_model(self, fn):
        self.model.save(fn)


In [3]:
from CarEnvironment import CarEnvironment
from car import Car
import pygame

pygame 2.1.0 (SDL 2.0.16, Python 3.9.10)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [4]:
GAME_SIZE = 500
START_POS = (20, GAME_SIZE - 20)
BLOCK_SIZE = 2
GAME_DIM = (GAME_SIZE, GAME_SIZE)
bg = None

board = np.load("./boards/curved/curved_board.npy")
waypoints = np.load("./boards/curved/curved_waypoints.npy")

pygame.init()
c = Car(0.2, 0.2, START_POS, board, waypoints, bg, BLOCK_SIZE, GAME_DIM, user="AI")
env = CarEnvironment(c)

In [5]:
from tqdm import tqdm

In [7]:
gamma   = 0.9
epsilon = .95

trials  = 10
trial_len = 100

# updateTargetNetwork = 1000
dqn_agent = DQN(env=env)
steps = []
for trial in range(trials):
    cur_state = env.reset().reshape(1,6)
    for step in range(trial_len):
        action = dqn_agent.act(cur_state)
        new_state, reward, done, _ = env.step(action)
        
        new_state = new_state.reshape(1,6)
        dqn_agent.remember(cur_state, action, reward, new_state, done)

        dqn_agent.replay()       # internally iterates default (prediction) model
        dqn_agent.target_train() # iterates target model

        cur_state = new_state
        if done:
            break
    dqn_agent.save_model("model-iter" + str(trial))  

INFO:tensorflow:Assets written to: model-iter0\assets
INFO:tensorflow:Assets written to: model-iter1\assets
INFO:tensorflow:Assets written to: model-iter2\assets
INFO:tensorflow:Assets written to: model-iter3\assets
INFO:tensorflow:Assets written to: model-iter4\assets
INFO:tensorflow:Assets written to: model-iter5\assets
INFO:tensorflow:Assets written to: model-iter6\assets
INFO:tensorflow:Assets written to: model-iter7\assets
INFO:tensorflow:Assets written to: model-iter8\assets
INFO:tensorflow:Assets written to: model-iter9\assets
