In [None]:
!pip install pygame

In [1]:
from models.DeepCNNModel import DeepCNNModel
from models.RandomModel import RandomModel
from models.HeuristicModel import HeuristicModel
from train_cnn import train_deepcnn_model
import fsutils as fs

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [6]:
# training params
FRAME_SKIP = 4
UPDATE_FREQ = FRAME_SKIP
TARGET_NET_SYNC_FREQ = 1000
MAX_EPS = 250
MAX_STEPS_PER_EP = 1000

# CNN hyperparams
# since we still want it to explore but don't want it to forget everything, start epsilon lower
TAU = 4
GAMMA = 0.95
EPS_START = 0.05
EPS_END = 0.05
EPS_DECAY_WINDOW = 50
REPLAY_BUF_CAPACITY = 10000
REPLAY_BUF_PREFILL_AMT = 2000
LR = 0.001
DOWNSAMPLE_SIZE = (112, 112)
BATCH_SIZE = 32

NUM_RAND_ADVERSARIES = 3
NUM_HEURISTIC_ADVERSARIES = 3

In [7]:
cnn_model = DeepCNNModel(tau=TAU, gamma=GAMMA, eps_start=EPS_START, eps_end=EPS_END,
                            eps_decay_window=EPS_DECAY_WINDOW, replay_buf_capacity=REPLAY_BUF_CAPACITY,
                            replay_buf_prefill_amt=REPLAY_BUF_PREFILL_AMT, lr=LR,
                            downsample_size=DOWNSAMPLE_SIZE, batch_size=BATCH_SIZE)
cnn_model_name = 'dqn_cnn_add_enemies_to_trained_food_next_250ep'

adversary_models = []
for i in range(NUM_RAND_ADVERSARIES):
    adversary_models.append(RandomModel(min_steps=5, max_steps=10))
for i in range(NUM_HEURISTIC_ADVERSARIES):
    adversary_models.append(HeuristicModel())

In [8]:
cnn_model = fs.load_deep_cnn_from_device(cnn_model, 'dqn_cnn_add_enemies_to_trained_food_250ep', 'cpu')

In [None]:
train_deepcnn_model(cnn_model, cnn_model_name, adversary_models, frame_skip=FRAME_SKIP, update_freq=UPDATE_FREQ,
                    target_net_sync_freq=TARGET_NET_SYNC_FREQ, max_eps=MAX_EPS, max_steps_per_ep=MAX_STEPS_PER_EP,
                    prefill_buffer=False)

Replay buffer prefill disabled.
Beginning training...
=== Starting Episode 0 ===
Step 0
Step 250
Step 500
Step 750
Ep Score: 97.5734 | Mean Score: 97.5734 | Steps Survived: 1000 | Mean Steps Survived: 1000.00
Mean Ep Loss: 11.8752 | Ep Reward: 93.7321 | Mean Reward: 97.5734
Model has been training for 4.7177 minutes.
=== Starting Episode 1 ===
Step 1000
Step 1250
Step 1500
Step 1750
Ep Score: 109.5385 | Mean Score: 103.5560 | Steps Survived: 1000 | Mean Steps Survived: 1000.00
Mean Ep Loss: 7.7912 | Ep Reward: 10.6047 | Mean Reward: 103.5560
Model has been training for 9.9081 minutes.
=== Starting Episode 2 ===
Step 2000
Ep Score: 1.9165 | Mean Score: 69.6762 | Steps Survived: 27 | Mean Steps Survived: 675.67
Mean Ep Loss: 2.0962 | Ep Reward: 1.8012 | Mean Reward: 69.6762
Model has been training for 10.0477 minutes.
=== Starting Episode 3 ===
Ep Score: 102.7588 | Mean Score: 77.9468 | Steps Survived: 46 | Mean Steps Survived: 518.25
Mean Ep Loss: 10.9685 | Ep Reward: -71.4699 | Mean Re