In [1]:
!pip install pygame

Collecting pygame
[?25l  Downloading https://files.pythonhosted.org/packages/8e/24/ede6428359f913ed9cd1643dd5533aefeb5a2699cc95bea089de50ead586/pygame-1.9.6-cp36-cp36m-manylinux1_x86_64.whl (11.4MB)
[K    100% |████████████████████████████████| 11.4MB 3.8MB/s eta 0:00:01
[31mfastai 1.0.60 requires nvidia-ml-py3, which is not installed.[0m
[?25hInstalling collected packages: pygame
Successfully installed pygame-1.9.6
[33mYou are using pip version 10.0.1, however version 20.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [14]:
from models.DeepCNNModel import DeepCNNModel
from models.RandomModel import RandomModel
from train_cnn import train_deepcnn_model
import fsutils as fs

In [17]:
# training params
FRAME_SKIP = 4
UPDATE_FREQ = FRAME_SKIP
TARGET_NET_SYNC_FREQ = 1000
MAX_EPS = 250
MAX_STEPS_PER_EP = 1000

# CNN hyperparams
TAU = 4
GAMMA = 0.95
EPS_START = 0.25
EPS_END = 0.05
EPS_DECAY_WINDOW = 50
REPLAY_BUF_CAPACITY = 10000
REPLAY_BUF_PREFILL_AMT = 5000
LR = 0.001
DOWNSAMPLE_SIZE = (112, 112)
BATCH_SIZE = 32

NUM_ADVERSARIES = 7

In [18]:
cnn_model = DeepCNNModel(tau=TAU, gamma=GAMMA, eps_start=EPS_START, eps_end=EPS_END,
                            eps_decay_window=EPS_DECAY_WINDOW, replay_buf_capacity=REPLAY_BUF_CAPACITY,
                            replay_buf_prefill_amt=REPLAY_BUF_PREFILL_AMT, lr=LR,
                            downsample_size=DOWNSAMPLE_SIZE, batch_size=BATCH_SIZE)
cnn_model_name = 'dqn_cnn_with_enemies_first250ep'

# loading trained model but getting rid of replay buf (which doesn't have enemies)
saved_net = fs.load_net_from_device(cnn_model.net, 'dqn_cnn_500ep_v2', 'cpu')
cnn_model.net = saved_net
cnn_model.sync_target_net()

adversary_models = []
for i in range(NUM_ADVERSARIES):
    adversary_models.append(RandomModel(min_steps=5, max_steps=10))

In [None]:
train_deepcnn_model(cnn_model, cnn_model_name, adversary_models, frame_skip=FRAME_SKIP, update_freq=UPDATE_FREQ,
                    target_net_sync_freq=TARGET_NET_SYNC_FREQ, max_eps=MAX_EPS, max_steps_per_ep=MAX_STEPS_PER_EP,
                    prefill_buffer=True)

Filling replay buffer to 50.0% capacity...
Replay buffer filled with 5000 samples!
Beginning training...
=== Starting Episode 0 ===
Step 0
Mean Episode Loss: 0.0133 | Episode Reward: -99.3241 | Mean Reward: -99.3241
Model has been training for 17.5427 minutes.
=== Starting Episode 1 ===
Step 250
Step 500
Step 750
Step 1000
Mean Episode Loss: 2.3567 | Episode Reward: 66.0690 | Mean Reward: -16.6275
Model has been training for 22.9179 minutes.
=== Starting Episode 2 ===
Step 1250
Step 1500
Step 1750
Step 2000
Mean Episode Loss: 3.0372 | Episode Reward: 25.9716 | Mean Reward: -2.4278
Model has been training for 28.0182 minutes.
=== Starting Episode 3 ===
Step 2250
Step 2500
Step 2750
Step 3000
Mean Episode Loss: 1.3011 | Episode Reward: 29.4748 | Mean Reward: 5.5478
Model has been training for 32.9982 minutes.
=== Starting Episode 4 ===
Step 3250
Step 3500
Step 3750
Step 4000
Mean Episode Loss: 0.0341 | Episode Reward: 159.2683 | Mean Reward: 36.2919
Model has been training for 38.0665 mi

=== Starting Episode 45 ===
Step 35000
Step 35250
Mean Episode Loss: 2.6512 | Episode Reward: -88.8509 | Mean Reward: 72.4356
Model has been training for 190.5098 minutes.
=== Starting Episode 46 ===
Step 35500
Mean Episode Loss: 9.3712 | Episode Reward: -87.6470 | Mean Reward: 61.3247
Model has been training for 192.0613 minutes.
=== Starting Episode 47 ===
Step 35750
Step 36000
Step 36250
Step 36500
Mean Episode Loss: 9.9446 | Episode Reward: -2.2162 | Mean Reward: 40.3322
Model has been training for 196.6267 minutes.
=== Starting Episode 48 ===
Step 36750
Step 37000
Step 37250
Step 37500
Mean Episode Loss: 16.1863 | Episode Reward: 298.0953 | Mean Reward: 69.3763
Model has been training for 201.3933 minutes.
=== Starting Episode 49 ===
Mean Episode Loss: 1.8379 | Episode Reward: -98.2074 | Mean Reward: 58.4649
Model has been training for 201.5367 minutes.
=== Starting Episode 50 ===
Step 37750
Step 38000
Step 38250
Step 38500
Mean Episode Loss: 15.0365 | Episode Reward: 23.6597 | Me

Step 68500
Step 68750
Step 69000
Step 69250
Mean Episode Loss: 4.7903 | Episode Reward: 14.9132 | Mean Reward: -16.4349
Model has been training for 359.1634 minutes.
=== Starting Episode 91 ===
Step 69500
Step 69750
Step 70000
Step 70250
Mean Episode Loss: 3.7699 | Episode Reward: 55.3287 | Mean Reward: -1.4423
Model has been training for 364.1493 minutes.
=== Starting Episode 92 ===
Step 70500
Step 70750
Step 71000
Step 71250
Mean Episode Loss: 4.8998 | Episode Reward: 4.0549 | Mean Reward: 8.8399
Model has been training for 368.8758 minutes.
=== Starting Episode 93 ===
Step 71500
Step 71750
Step 72000
Step 72250
Mean Episode Loss: 2.2200 | Episode Reward: 28.1175 | Mean Reward: 10.8309
Model has been training for 373.7542 minutes.
=== Starting Episode 94 ===
Step 72500
Mean Episode Loss: 11.0329 | Episode Reward: -98.9048 | Mean Reward: 9.7795
Model has been training for 374.5264 minutes.
=== Starting Episode 95 ===
Step 72750
Step 73000
Mean Episode Loss: 6.6710 | Episode Reward: -1

Mean Episode Loss: 0.2670 | Episode Reward: -99.1370 | Mean Reward: -32.5200
Model has been training for 527.5964 minutes.
=== Starting Episode 135 ===
Step 105750
Step 106000
Step 106250
Step 106500
Mean Episode Loss: 7.7216 | Episode Reward: 7.9166 | Mean Reward: -50.1656
Model has been training for 532.0082 minutes.
=== Starting Episode 136 ===
Step 106750
Step 107000
Step 107250
Step 107500
Mean Episode Loss: 9.9745 | Episode Reward: 8.6796 | Mean Reward: -39.7289
Model has been training for 536.4213 minutes.
=== Starting Episode 137 ===
Step 107750
Step 108000
Step 108250
Step 108500
Mean Episode Loss: 8.2325 | Episode Reward: 8.3602 | Mean Reward: -39.0012
Model has been training for 540.7837 minutes.
=== Starting Episode 138 ===
Step 108750
Step 109000
Step 109250
Step 109500
Mean Episode Loss: 6.0933 | Episode Reward: 5.4544 | Mean Reward: -39.8679
Model has been training for 545.2116 minutes.
=== Starting Episode 139 ===
Step 109750
Step 110000
Step 110250
Step 110500
Mean Epi

In [None]:
##############################

In [None]:
# loaded training hyperparams
FRAME_SKIP = 4
UPDATE_FREQ = FRAME_SKIP
TARGET_NET_SYNC_FREQ = 1000
MAX_EPS = 250
MAX_STEPS_PER_EP = 500

# loaded CNN hyperparams
TAU = 4
GAMMA = 0.95
EPS_START = 0.05
EPS_END = 0.05
EPS_DECAY_WINDOW = 50
REPLAY_BUF_CAPACITY = 10000
REPLAY_BUF_PREFILL_AMT = 1000
LR = 0.001
DOWNSAMPLE_SIZE = (112, 112)
BATCH_SIZE = 32

loaded_model = DeepCNNModel(tau=TAU, gamma=GAMMA, eps_start=EPS_START, eps_end=EPS_END,
                            eps_decay_window=EPS_DECAY_WINDOW, replay_buf_capacity=REPLAY_BUF_CAPACITY,
                            replay_buf_prefill_amt=REPLAY_BUF_PREFILL_AMT, lr=LR,
                            downsample_size=DOWNSAMPLE_SIZE, batch_size=BATCH_SIZE)
loaded_model_name = 'dqn_loaded_next_250_step'
loaded_model = fs.load_deep_cnn_from_device(loaded_model, 'dqn_cnn_500ep_v2', 'cpu')
adversary_models = []

In [None]:
train_deepcnn_model(loaded_model, loaded_model_name, adversary_models, frame_skip=FRAME_SKIP, update_freq=UPDATE_FREQ,
                    target_net_sync_freq=TARGET_NET_SYNC_FREQ, max_eps=MAX_EPS, max_steps_per_ep=MAX_STEPS_PER_EP,
                    prefill_buffer=False)