In [3]:
%load_ext autoreload
%autoreload 2

import torch
from alpha_connect import (
    AlphaZeroModelConnect4,
    Trainer,
    AlphaZeroModelChineseCheckers,
    GameChoice,
    AlphaZeroModelBounce,
)
import os
from game import BounceState

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
device = torch.device("mps")

args = {
    "batch_size": 128,
    "numIters": 500,  # Total number of training iterations
    "num_simulations": 500,  # Total number of MCTS simulations to run when deciding on a move to play
    "max_game_length": 200,  # Maximum number of moves in a game (used to avoid infinite games)
    "numEps": 70,  # Number of full games (episodes) to run during each iteration for each thread
    "numThreads": 1,  # Number of threads running simulations
    "numItersForTrainExamplesHistory": 20,
    "epochs": 3,  # Number of epochs of training per iteration
    "checkpoint_path": "../data/latest_bounce.pth",  # location to save latest set of weights
    "loss_history_path": "../data/loss_history_bounce.csv",  # location to save loss history
    "lr": 0.0005,  # learning rate
    "lr_decay": 0.995,  # learning rate decay
    "temperature": 1,  #
}

GameChoice.set_game(BounceState)

model = AlphaZeroModelBounce()
if os.path.exists(args["checkpoint_path"]):
    model.load_state_dict(torch.load(args["checkpoint_path"]))
    print("Loaded model from checkpoint")

model.to(device)

trainer = Trainer(model, args)
trainer.learn()

Loaded model from checkpoint
Resuming training from iteration 173 with best loss 1.1896456206838288
173/500 with lr 0.0004975



  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:31<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:31<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:18<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:54<13:41:50, 714.65s/it]
(Game length:4):   1%|▏         | 1/70 [11:54<13:41:50, 714.65s/it]
(Game length:4):   3%|▎         | 2/70 [14:11<7:04:55, 374.93s/it] 
(Game length:5):   3%|▎         | 2/70 [14:11<7:04:55, 374.93s/it]
(Game length:5):   4%|▍         | 3/70 [16:14<4:49:58, 259.68s/it]
(Game length:6):  11%|█▏        | 8/70 [16:14<4:28:20, 259.68s/it]
(Game length:6):  13%|█▎        | 9/70 [17:56<1:07:29, 66.39s/it] 
(Game length:7):  14%|█▍        | 10/70 [17:56<1:06:23, 66.39s/it]
(Game length:7):  16%|█▌        | 11/70 [19:32<1:00:22, 61.39s/it]
(Game length:8):  29%|██▊       | 20/70 [19:32<51:09, 61.39s/it]  
(Game length:8):  30%|███       | 21/70 [20:57<21:29, 26.31s/it


Loss: 1.481891137858232
Policy Loss 0.6339475736021996
Value Loss 0.8479435642560323
6 batches processed
Examples:
tensor([0.0117, 0.1204, 0.8679, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.0140, 0.9459, 0.0401, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:15<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:14<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:01<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:37<13:22:07, 697.50s/it]
(Game length:4):   1%|▏         | 1/70 [11:37<13:22:07, 697.50s/it]
(Game length:4):   3%|▎         | 2/70 [13:58<6:59:10, 369.86s/it] 
(Game length:5):   3%|▎         | 2/70 [13:58<6:59:10, 369.86s/it]
(Game length:5):   4%|▍         | 3/70 [16:05<4:49:29, 259.24s/it]
(Game length:6):  10%|█         | 7/70 [16:05<4:32:12, 259.24s/it]
(Game length:6):  11%|█▏        | 8/70 [17:52<1:19:46, 77.20s/it] 
(Game length:7):  17%|█▋        | 12/70 [17:52<1:14:37, 77.20s/it]
(Game length:7):  19%|█▊        | 13/70 [19:25<44:13, 46.55s/it]  
(Game length:8):  31%|███▏      | 22/70 [19:25<37:14, 46.55s/it]
(Game length:8):  33%|███▎      | 23/70 [20:38<18:04, 23.08s/it]



Loss: 1.524160417417685
Policy Loss 0.6586818918585777
Value Loss 0.8654785255591074
6 batches processed
Examples:
tensor([0.0308, 0.0353, 0.0351, 0.0341, 0.0304, 0.0340, 0.0345, 0.0338, 0.0335,
        0.0357, 0.0339, 0.0297, 0.0313, 0.0306, 0.0329, 0.0315, 0.0374, 0.0329,
        0.0305, 0.0365, 0.0390, 0.0321, 0.0294, 0.0341, 0.0364, 0.0296, 0.0346,
        0.0364, 0.0290, 0.0349, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.0000, 0.0040, 0.9820, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0040, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0060, 0.0000,
        0.0000, 0.0000, 0.0040, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:16<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:20<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:10<?, ?it/s]
(Game length:4):   0%|          | 0/70 [11:50<?, ?it/s]
(Game length:4):   1%|▏         | 1/70 [14:16<16:24:52, 856.41s/it]
(Game length:5):   3%|▎         | 2/70 [14:16<16:10:36, 856.41s/it]
(Game length:5):   4%|▍         | 3/70 [16:23<5:00:21, 268.98s/it] 
(Game length:6):  10%|█         | 7/70 [16:23<4:42:25, 268.98s/it]
(Game length:6):  11%|█▏        | 8/70 [18:06<1:31:44, 88.78s/it] 
(Game length:7):  11%|█▏        | 8/70 [18:06<1:31:44, 88.78s/it]
(Game length:7):  13%|█▎        | 9/70 [19:42<1:31:31, 90.02s/it]
(Game length:8):  24%|██▍       | 17/70 [19:42<1:19:30, 90.02s/it]
(Game length:8):  26%|██▌       | 18/70 [21:05<29:50, 34.42s/it]  
(Game length:9):  36%|███▌      | 25/70 [21:05<25:49, 34.42s/it]
(Game lengt


Loss: 1.4943298920989037
Policy Loss 0.6687054261565208
Value Loss 0.8256244659423828
6 batches processed
Examples:
tensor([0.0401, 0.0321, 0.0541, 0.0401, 0.0319, 0.0537, 0.0453, 0.0448, 0.0478,
        0.0361, 0.0523, 0.0497, 0.0379, 0.0440, 0.0418, 0.0501, 0.0540, 0.0419,
        0.0532, 0.0541, 0.0412, 0.0537, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.0020, 0.0080, 0.1283, 0.0160, 0.0341, 0.0842, 0.0301, 0.0160, 0.0140,
        0.0160, 0.2525, 0.0200, 0.0040, 0.1323, 0.0180, 0.0721, 0.0140, 0.0080,
        0.0501, 0.0601, 0.0080, 0.0120, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
  


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:12<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:16<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:03<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:42<13:27:59, 702.60s/it]
(Game length:4):   3%|▎         | 2/70 [11:42<13:16:16, 702.60s/it]
(Game length:4):   4%|▍         | 3/70 [14:04<4:22:04, 234.70s/it] 
(Game length:5):   4%|▍         | 3/70 [14:04<4:22:04, 234.70s/it]
(Game length:5):   6%|▌         | 4/70 [16:08<3:36:09, 196.50s/it]
(Game length:6):  14%|█▍        | 10/70 [16:08<3:16:30, 196.50s/it]
(Game length:6):  16%|█▌        | 11/70 [17:47<54:05, 55.02s/it]   
(Game length:7):  23%|██▎       | 16/70 [17:47<49:30, 55.02s/it]
(Game length:7):  24%|██▍       | 17/70 [19:09<30:47, 34.86s/it]
(Game length:8):  34%|███▍      | 24/70 [19:09<26:43, 34.86s/it]
(Game length:8):  36%|███▌      | 25/70 [20:24<16:55, 22.57s/it]
(G


Loss: 1.5735285952687263
Policy Loss 0.6610938683152199
Value Loss 0.9124347269535065
6 batches processed
Examples:
tensor([0.0371, 0.0383, 0.0294, 0.0322, 0.0367, 0.0307, 0.0325, 0.0302, 0.0317,
        0.0355, 0.0372, 0.0329, 0.0377, 0.0293, 0.0316, 0.0364, 0.0306, 0.0325,
        0.0297, 0.0313, 0.0361, 0.0383, 0.0365, 0.0300, 0.0320, 0.0307, 0.0329,
        0.0362, 0.0310, 0.0330, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.0060, 0.0040, 0.1122, 0.0200, 0.1924, 0.0020, 0.0140, 0.0180, 0.0040,
        0.0281, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:12<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:11<?, ?it/s]
(Game length:2):   1%|▏         | 1/70 [08:59<10:19:55, 539.07s/it]
(Game length:3):   1%|▏         | 1/70 [08:59<10:19:55, 539.07s/it]
(Game length:3):   3%|▎         | 2/70 [11:33<5:54:14, 312.57s/it] 
(Game length:4):   3%|▎         | 2/70 [11:33<5:54:14, 312.57s/it]
(Game length:4):   4%|▍         | 3/70 [13:51<4:20:20, 233.15s/it]
(Game length:5):   4%|▍         | 3/70 [13:51<4:20:20, 233.15s/it]
(Game length:5):   6%|▌         | 4/70 [15:55<3:29:05, 190.09s/it]
(Game length:6):  11%|█▏        | 8/70 [15:55<3:16:25, 190.09s/it]
(Game length:6):  13%|█▎        | 9/70 [17:39<1:06:13, 65.15s/it] 
(Game length:7):  17%|█▋        | 12/70 [17:39<1:02:58, 65.15s/it]
(Game length:7):  19%|█▊        | 13/70 [19:17<44:09, 46.49s/it]  
(Game length:8):  33%|███▎      | 23/70 [19:17<36:24


Loss: 1.50985316435496
Policy Loss 0.5887322425842285
Value Loss 0.9211209217707316
6 batches processed
Examples:
tensor([0.3694, 0.0970, 0.3011, 0.2326, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000])
tensor([0.2866, 0.1924, 0.2445, 0.2766, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.00


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:16<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:20<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:12<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:51<13:38:46, 711.98s/it]
(Game length:4):   3%|▎         | 2/70 [11:51<13:26:54, 711.98s/it]
(Game length:4):   4%|▍         | 3/70 [14:14<4:24:54, 237.24s/it] 
(Game length:5):   7%|▋         | 5/70 [14:14<4:17:00, 237.24s/it]
(Game length:5):   9%|▊         | 6/70 [16:16<2:04:29, 116.71s/it]
(Game length:6):  19%|█▊        | 13/70 [16:16<1:50:52, 116.71s/it]
(Game length:6):  20%|██        | 14/70 [17:52<40:31, 43.42s/it]   
(Game length:7):  27%|██▋       | 19/70 [17:52<36:54, 43.42s/it]
(Game length:7):  29%|██▊       | 20/70 [19:16<25:37, 30.75s/it]
(Game length:8):  41%|████▏     | 29/70 [19:16<21:00, 30.75s/it]
(Game length:8):  43%|████▎     | 30/70 [20:21<12:20, 18.52s/it]
(G


Loss: 1.398202359676361
Policy Loss 0.5077466726303101
Value Loss 0.890455687046051
5 batches processed
Examples:
tensor([0.6857, 0.3143, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.5451, 0.4549, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.00


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:17<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:22<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:12<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:51<13:37:44, 711.08s/it]
(Game length:4):   1%|▏         | 1/70 [11:51<13:37:44, 711.08s/it]
(Game length:4):   3%|▎         | 2/70 [14:12<7:06:15, 376.11s/it] 
(Game length:5):   6%|▌         | 4/70 [14:12<6:53:43, 376.11s/it]
(Game length:5):   7%|▋         | 5/70 [16:15<2:27:33, 136.21s/it]
(Game length:6):   9%|▊         | 6/70 [16:15<2:25:17, 136.21s/it]
(Game length:6):  10%|█         | 7/70 [18:04<1:48:03, 102.91s/it]
(Game length:7):  16%|█▌        | 11/70 [18:04<1:41:11, 102.91s/it]
(Game length:7):  17%|█▋        | 12/70 [19:35<51:07, 52.88s/it]   
(Game length:8):  26%|██▌       | 18/70 [19:35<45:49, 52.88s/it]
(Game length:8):  27%|██▋       | 19/70 [21:03<26:22, 31.04s/it


Loss: 1.4749808882673583
Policy Loss 0.5726494863629341
Value Loss 0.9023314019044241
6 batches processed
Examples:
tensor([0.0284, 0.0231, 0.0267, 0.0287, 0.0230, 0.0264, 0.0268, 0.0205, 0.0249,
        0.0278, 0.0276, 0.0304, 0.0258, 0.0198, 0.0252, 0.0250, 0.0281, 0.0284,
        0.0236, 0.0276, 0.0201, 0.0217, 0.0298, 0.0271, 0.0224, 0.0283, 0.0257,
        0.0199, 0.0255, 0.0251, 0.0281, 0.0288, 0.0233, 0.0278, 0.0203, 0.0301,
        0.0271, 0.0229, 0.0279, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000])
tensor([0.0240, 0.0521, 0.0301, 0.0902, 0.0281, 0.0200, 0.0401, 0.0120, 0.0240,
        0.0621, 0.0140, 0.0220, 0.0120, 0.0080, 0.0461, 0.0040, 0.0441, 0.0220,
        0.0020, 0.0120, 0.0140, 0.0100, 0.0100, 0.0200, 0.0120, 0.0100, 0.0060,
        0.0040, 0.0100, 0.0561, 0.0220, 0.0361, 0.0020, 0.0641, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:11<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:12<?, ?it/s]
(Game length:2):   1%|▏         | 1/70 [09:01<10:22:27, 541.27s/it]
(Game length:3):   1%|▏         | 1/70 [09:01<10:22:27, 541.27s/it]
(Game length:3):   3%|▎         | 2/70 [11:35<5:55:29, 313.67s/it] 
(Game length:4):   7%|▋         | 5/70 [11:35<5:39:48, 313.67s/it]
(Game length:5):   7%|▋         | 5/70 [13:52<5:39:48, 313.67s/it]
(Game length:5):   9%|▊         | 6/70 [15:58<2:10:44, 122.58s/it]
(Game length:6):  19%|█▊        | 13/70 [15:58<1:56:26, 122.58s/it]
(Game length:6):  20%|██        | 14/70 [17:35<43:30, 46.61s/it]   
(Game length:7):  29%|██▊       | 20/70 [17:35<38:50, 46.61s/it]
(Game length:7):  30%|███       | 21/70 [18:49<24:28, 29.97s/it]
(Game length:8):  39%|███▊      | 27/70 [18:49<21:28, 29.97s/it]
(Game length:8):  40%|████      | 28/70 [20:00<15:27, 22


Loss: 1.5036077722907066
Policy Loss 0.5822706148028374
Value Loss 0.9213371574878693
6 batches processed
Examples:
tensor([0.0339, 0.0398, 0.0485, 0.0345, 0.0401, 0.0484, 0.0363, 0.0303, 0.0371,
        0.0381, 0.0379, 0.0359, 0.0293, 0.0358, 0.0325, 0.0383, 0.0293, 0.0384,
        0.0311, 0.0309, 0.0358, 0.0295, 0.0353, 0.0326, 0.0388, 0.0393, 0.0303,
        0.0317, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.0721, 0.0120, 0.2024, 0.0220, 0.0160, 0.1142, 0.0160, 0.0040, 0.0421,
        0.0120, 0.0160, 0.0120, 0.0160, 0.0220, 0.0100, 0.0060, 0.0120, 0.0140,
        0.0020, 0.0080, 0.0441, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:11<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:16<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:09<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:51<13:38:44, 711.94s/it]
(Game length:4):   1%|▏         | 1/70 [11:51<13:38:44, 711.94s/it]
(Game length:4):   3%|▎         | 2/70 [14:16<7:08:32, 378.13s/it] 
(Game length:5):   4%|▍         | 3/70 [14:16<7:02:14, 378.13s/it]
(Game length:5):   6%|▌         | 4/70 [16:23<3:19:05, 180.99s/it]
(Game length:6):  11%|█▏        | 8/70 [16:23<3:07:01, 180.99s/it]
(Game length:6):  13%|█▎        | 9/70 [18:06<1:11:18, 70.14s/it] 
(Game length:7):  16%|█▌        | 11/70 [18:06<1:08:58, 70.14s/it]
(Game length:7):  17%|█▋        | 12/70 [19:38<53:30, 55.36s/it]  
(Game length:8):  26%|██▌       | 18/70 [19:38<47:58, 55.36s/it]
(Game length:8):  27%|██▋       | 19/70 [21:02<26:39, 31.35s/it]



Loss: 1.4449424942334494
Policy Loss 0.5913538336753845
Value Loss 0.8535886605580648
6 batches processed
Examples:
tensor([0.0402, 0.0267, 0.0391, 0.0345, 0.0377, 0.0292, 0.0417, 0.0365, 0.0352,
        0.0363, 0.0294, 0.0384, 0.0350, 0.0323, 0.0416, 0.0405, 0.0336, 0.0372,
        0.0362, 0.0346, 0.0358, 0.0294, 0.0379, 0.0348, 0.0326, 0.0422, 0.0340,
        0.0373, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000])
tensor([0.0501, 0.0782, 0.0020, 0.0020, 0.0281, 0.0281, 0.0020, 0.0220, 0.1603,
        0.0100, 0.0782, 0.0361, 0.0301, 0.0160, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:11<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:12<?, ?it/s]
(Game length:2):   1%|▏         | 1/70 [09:01<10:22:13, 541.06s/it]
(Game length:3):   1%|▏         | 1/70 [09:01<10:22:13, 541.06s/it]
(Game length:3):   3%|▎         | 2/70 [11:35<5:55:21, 313.54s/it] 
(Game length:4):   3%|▎         | 2/70 [11:35<5:55:21, 313.54s/it]
(Game length:4):   4%|▍         | 3/70 [13:53<4:20:29, 233.28s/it]
(Game length:5):   7%|▋         | 5/70 [13:53<4:12:43, 233.28s/it]
(Game length:5):   9%|▊         | 6/70 [15:55<1:52:57, 105.90s/it]
(Game length:6):  21%|██▏       | 15/70 [15:55<1:37:04, 105.90s/it]
(Game length:6):  23%|██▎       | 16/70 [17:25<29:05, 32.33s/it]   
(Game length:7):  24%|██▍       | 17/70 [17:25<28:33, 32.33s/it]
(Game length:7):  26%|██▌       | 18/70 [18:49<29:32, 34.09s/it]
(Game length:8):  37%|███▋      | 26/70 [18:49<25:00, 


Loss: 1.5172307267785072
Policy Loss 0.6393632665276527
Value Loss 0.8778674602508545
6 batches processed
Examples:
tensor([0.0373, 0.0320, 0.0348, 0.0263, 0.0359, 0.0304, 0.0281, 0.0340, 0.0372,
        0.0315, 0.0348, 0.0243, 0.0264, 0.0361, 0.0305, 0.0276, 0.0346, 0.0341,
        0.0262, 0.0291, 0.0287, 0.0341, 0.0341, 0.0262, 0.0294, 0.0289, 0.0345,
        0.0339, 0.0265, 0.0291, 0.0288, 0.0346, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000])
tensor([0.0301, 0.0240, 0.0140, 0.0060, 0.0120, 0.0060, 0.0601, 0.0160, 0.0080,
        0.0020, 0.0140, 0.0100, 0.0741, 0.2365, 0.0301, 0.0020, 0.0020, 0.0220,
        0.0301, 0.0100, 0.0020, 0.0281, 0.0020, 0.0020, 0.1583, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:08<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:13<?, ?it/s]
(Game length:2):   1%|▏         | 1/70 [09:03<10:24:34, 543.11s/it]
(Game length:3):   1%|▏         | 1/70 [09:03<10:24:34, 543.11s/it]
(Game length:4):   1%|▏         | 1/70 [11:38<10:24:34, 543.11s/it]
(Game length:5):   1%|▏         | 1/70 [14:02<10:24:34, 543.11s/it]
(Game length:5):   3%|▎         | 2/70 [16:09<8:57:55, 474.64s/it] 
(Game length:6):  13%|█▎        | 9/70 [16:09<8:02:33, 474.64s/it]
(Game length:6):  14%|█▍        | 10/70 [17:47<1:12:03, 72.06s/it]
(Game length:7):  17%|█▋        | 12/70 [17:47<1:09:39, 72.06s/it]
(Game length:7):  19%|█▊        | 13/70 [19:18<55:52, 58.81s/it]  
(Game length:8):  33%|███▎      | 23/70 [19:18<46:03, 58.81s/it]
(Game length:8):  34%|███▍      | 24/70 [20:31<20:06, 26.23s/it]
(Game length:9):  39%|███▊      | 27/70 [20:31<18:47, 


Loss: 1.4562278936306634
Policy Loss 0.5370314568281174
Value Loss 0.9191964368025461
6 batches processed
Examples:
tensor([0.1162, 0.0674, 0.1314, 0.0651, 0.1046, 0.1054, 0.0401, 0.0508, 0.1084,
        0.1146, 0.0683, 0.0277, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.1443, 0.0180, 0.1222, 0.0020, 0.0060, 0.0982, 0.0100, 0.3046, 0.2144,
        0.0681, 0.0060, 0.0060, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:13<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:20<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:10<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:49<13:35:29, 709.13s/it]
(Game length:4):   1%|▏         | 1/70 [11:49<13:35:29, 709.13s/it]
(Game length:4):   3%|▎         | 2/70 [14:08<7:03:59, 374.12s/it] 
(Game length:5):   3%|▎         | 2/70 [14:08<7:03:59, 374.12s/it]
(Game length:5):   4%|▍         | 3/70 [16:14<4:51:05, 260.67s/it]
(Game length:6):  10%|█         | 7/70 [16:14<4:33:42, 260.67s/it]
(Game length:6):  11%|█▏        | 8/70 [17:53<1:18:49, 76.28s/it] 
(Game length:7):  11%|█▏        | 8/70 [17:53<1:18:49, 76.28s/it]
(Game length:7):  13%|█▎        | 9/70 [19:25<1:20:26, 79.13s/it]
(Game length:8):  23%|██▎       | 16/70 [19:25<1:11:12, 79.13s/it]
(Game length:8):  24%|██▍       | 17/70 [20:51<29:19, 33.19s/it] 


Loss: 1.3627047315239906
Policy Loss 0.5168577507138252
Value Loss 0.8458469808101654
6 batches processed
Examples:
tensor([0.0903, 0.0962, 0.1013, 0.0893, 0.0851, 0.0767, 0.0759, 0.0776, 0.0719,
        0.0784, 0.0797, 0.0776, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000])
tensor([0.0381, 0.0220, 0.0381, 0.0301, 0.0321, 0.5611, 0.0220, 0.0160, 0.0301,
        0.0240, 0.0601, 0.1263, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:09<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:10<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:00<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:40<13:26:07, 700.98s/it]
(Game length:4):   3%|▎         | 2/70 [11:40<13:14:26, 700.98s/it]
(Game length:4):   4%|▍         | 3/70 [14:01<4:21:06, 233.83s/it] 
(Game length:5):   4%|▍         | 3/70 [14:01<4:21:06, 233.83s/it]
(Game length:5):   6%|▌         | 4/70 [16:05<3:35:11, 195.63s/it]
(Game length:6):  11%|█▏        | 8/70 [16:05<3:22:08, 195.63s/it]
(Game length:6):  13%|█▎        | 9/70 [17:44<1:11:42, 70.54s/it] 
(Game length:7):  13%|█▎        | 9/70 [17:44<1:11:42, 70.54s/it]
(Game length:7):  14%|█▍        | 10/70 [19:16<1:14:10, 74.18s/it]
(Game length:8):  26%|██▌       | 18/70 [19:16<1:04:17, 74.18s/it]
(Game length:8):  27%|██▋       | 19/70 [20:39<25:14, 29.70s/it]


Loss: 1.5599966123700142
Policy Loss 0.5747036412358284
Value Loss 0.9852929711341858
6 batches processed
Examples:
tensor([0.0387, 0.0315, 0.0424, 0.0388, 0.0464, 0.0306, 0.0450, 0.0358, 0.0384,
        0.0331, 0.0435, 0.0370, 0.0312, 0.0442, 0.0409, 0.0414, 0.0380, 0.0362,
        0.0415, 0.0484, 0.0378, 0.0469, 0.0479, 0.0371, 0.0474, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.0000, 0.0000, 0.0020, 0.0040, 0.0020, 0.0000, 0.0060, 0.9419, 0.0020,
        0.0000, 0.0040, 0.0040, 0.0000, 0.0060, 0.0040, 0.0060, 0.0020, 0.0000,
        0.0040, 0.0020, 0.0020, 0.0020, 0.0020, 0.0020, 0.0020, 0.0000, 0.0000,
        0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:12<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:13<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:04<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:42<13:28:00, 702.61s/it]
(Game length:4):   1%|▏         | 1/70 [11:42<13:28:00, 702.61s/it]
(Game length:4):   3%|▎         | 2/70 [14:03<7:01:45, 372.13s/it] 
(Game length:5):   6%|▌         | 4/70 [14:03<6:49:20, 372.13s/it]
(Game length:5):   7%|▋         | 5/70 [16:09<2:27:02, 135.73s/it]
(Game length:6):  13%|█▎        | 9/70 [16:09<2:17:59, 135.73s/it]
(Game length:6):  14%|█▍        | 10/70 [17:51<1:03:04, 63.08s/it]
(Game length:7):  19%|█▊        | 13/70 [17:51<59:55, 63.08s/it]  
(Game length:7):  20%|██        | 14/70 [19:23<43:13, 46.31s/it]
(Game length:8):  36%|███▌      | 25/70 [19:23<34:43, 46.31s/it]
(Game length:8):  37%|███▋      | 26/70 [20:37<15:01, 20.49s/it]
(G


Loss: 1.397652480006218
Policy Loss 0.45680759847164154
Value Loss 0.9408448815345765
5 batches processed
Examples:
tensor([0.0399, 0.0506, 0.0692, 0.0407, 0.0508, 0.0687, 0.0412, 0.0405, 0.0522,
        0.0517, 0.0464, 0.0496, 0.0489, 0.0426, 0.0387, 0.0441, 0.0367, 0.0304,
        0.0446, 0.0374, 0.0302, 0.0450, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.1403, 0.0200, 0.0641, 0.0240, 0.1002, 0.0641, 0.0361, 0.0240, 0.0481,
        0.0100, 0.0421, 0.0561, 0.0541, 0.0341, 0.0521, 0.0341, 0.0341, 0.0281,
        0.0401, 0.0361, 0.0200, 0.0381, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([-0.0253], grad_fn=<SelectBackward0>)
tensor(1.)
187/500 with lr 0.0004637


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:15<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:21<?, ?it/s]
(Game length:2):   1%|▏         | 1/70 [09:14<10:37:30, 554.36s/it]
(Game length:3):   1%|▏         | 1/70 [09:14<10:37:30, 554.36s/it]
(Game length:4):   1%|▏         | 1/70 [11:54<10:37:30, 554.36s/it]
(Game length:5):   1%|▏         | 1/70 [14:16<10:37:30, 554.36s/it]
(Game length:5):   3%|▎         | 2/70 [16:29<9:09:02, 484.45s/it] 
(Game length:6):  13%|█▎        | 9/70 [16:29<8:12:31, 484.45s/it]
(Game length:6):  14%|█▍        | 10/70 [18:14<1:14:06, 74.10s/it]
(Game length:6):  21%|██▏       | 15/70 [18:14<38:14, 41.72s/it]  
(Game length:7):  21%|██▏       | 15/70 [18:14<38:14, 41.72s/it]
(Game length:7):  21%|██▏       | 15/70 [18:24<38:14, 41.72s/it]
(Game length:7):  23%|██▎       | 16/70 [19:35<41:28, 46.08s/it]
(Game length:8):  31%|███▏      | 22/70 [19:35<36:52, 46


Loss: 1.3070518349607787
Policy Loss 0.44457045942544937
Value Loss 0.8624813755353292
6 batches processed
Examples:
tensor([0.0664, 0.0914, 0.0708, 0.0883, 0.0965, 0.0634, 0.0902, 0.0705, 0.0887,
        0.0896, 0.0936, 0.0907, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.0762, 0.0782, 0.0882, 0.0922, 0.0982, 0.1022, 0.1002, 0.0802, 0.0461,
        0.0721, 0.0902, 0.0762, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:09<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:11<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:03<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:41<13:26:20, 701.17s/it]
(Game length:4):   1%|▏         | 1/70 [11:41<13:26:20, 701.17s/it]
(Game length:4):   3%|▎         | 2/70 [14:03<7:02:19, 372.65s/it] 
(Game length:5):   9%|▊         | 6/70 [14:03<6:37:29, 372.65s/it]
(Game length:5):  10%|█         | 7/70 [16:03<1:35:26, 90.90s/it] 
(Game length:6):  19%|█▊        | 13/70 [16:03<1:26:21, 90.90s/it]
(Game length:6):  20%|██        | 14/70 [17:40<40:29, 43.38s/it]  
(Game length:7):  26%|██▌       | 18/70 [17:40<37:35, 43.38s/it]
(Game length:7):  27%|██▋       | 19/70 [19:06<28:14, 33.23s/it]
(Game length:8):  39%|███▊      | 27/70 [19:06<23:48, 33.23s/it]
(Game length:8):  40%|████      | 28/70 [20:15<14:19, 20.47s/it]
(Gam


Loss: 1.4606667583187423
Policy Loss 0.5146907493472099
Value Loss 0.9459760089715322
6 batches processed
Examples:
tensor([0.0703, 0.0719, 0.0679, 0.0564, 0.0822, 0.0612, 0.0806, 0.0708, 0.0681,
        0.0753, 0.0741, 0.0498, 0.0602, 0.1113, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000])
tensor([0.0401, 0.0100, 0.0200, 0.0080, 0.0200, 0.0100, 0.0160, 0.0140, 0.0180,
        0.0281, 0.0040, 0.6273, 0.0160, 0.1683, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:11<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:15<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:07<?, ?it/s]
(Game length:4):   0%|          | 0/70 [11:46<?, ?it/s]
(Game length:4):   1%|▏         | 1/70 [14:08<16:15:16, 848.07s/it]
(Game length:5):   3%|▎         | 2/70 [14:08<16:01:08, 848.07s/it]
(Game length:5):   4%|▍         | 3/70 [16:15<4:58:06, 266.96s/it] 
(Game length:6):  10%|█         | 7/70 [16:15<4:40:18, 266.96s/it]
(Game length:6):  11%|█▏        | 8/70 [18:00<1:31:31, 88.58s/it] 
(Game length:7):  21%|██▏       | 15/70 [18:00<1:21:11, 88.58s/it]
(Game length:7):  23%|██▎       | 16/70 [19:25<35:56, 39.94s/it]  
(Game length:8):  31%|███▏      | 22/70 [19:25<31:57, 39.94s/it]
(Game length:8):  33%|███▎      | 23/70 [20:46<21:30, 27.47s/it]
(Game length:9):  41%|████▏     | 29/70 [20:46<18:46, 27.47s/it]
(Game length:


Loss: 1.3695955723524094
Policy Loss 0.5019865483045578
Value Loss 0.8676090240478516
6 batches processed
Examples:
tensor([0.0914, 0.0771, 0.1116, 0.1025, 0.0864, 0.0762, 0.0958, 0.1023, 0.0863,
        0.0745, 0.0959, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.1423, 0.1303, 0.1403, 0.1082, 0.1062, 0.0261, 0.0581, 0.0721, 0.0421,
        0.0962, 0.0782, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:10<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:11<?, ?it/s]
(Game length:2):   1%|▏         | 1/70 [09:02<10:23:35, 542.25s/it]
(Game length:3):   1%|▏         | 1/70 [09:02<10:23:35, 542.25s/it]
(Game length:3):   3%|▎         | 2/70 [11:38<5:57:26, 315.39s/it] 
(Game length:4):   3%|▎         | 2/70 [11:38<5:57:26, 315.39s/it]
(Game length:4):   4%|▍         | 3/70 [13:59<4:22:52, 235.41s/it]
(Game length:5):   6%|▌         | 4/70 [13:59<4:18:56, 235.41s/it]
(Game length:5):   7%|▋         | 5/70 [15:59<2:27:32, 136.20s/it]
(Game length:6):  11%|█▏        | 8/70 [15:59<2:20:44, 136.20s/it]
(Game length:6):  13%|█▎        | 9/70 [17:42<1:09:07, 67.99s/it] 
(Game length:7):  24%|██▍       | 17/70 [17:42<1:00:03, 67.99s/it]
(Game length:7):  26%|██▌       | 18/70 [19:07<25:11, 29.06s/it]  
(Game length:8):  37%|███▋      | 26/70 [19:07<21:18


Loss: 1.4299931585788728
Policy Loss 0.5168351233005524
Value Loss 0.9131580352783203
5 batches processed
Examples:
tensor([0.0530, 0.0439, 0.0426, 0.0532, 0.0438, 0.0428, 0.0509, 0.0447, 0.0520,
        0.0317, 0.0550, 0.0429, 0.0354, 0.0430, 0.0430, 0.0451, 0.0488, 0.0463,
        0.0435, 0.0484, 0.0464, 0.0436, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.3026, 0.0261, 0.0301, 0.1022, 0.0040, 0.0421, 0.0401, 0.0140, 0.0020,
        0.0020, 0.0381, 0.0040, 0.0020, 0.0020, 0.0962, 0.0100, 0.0281, 0.0541,
        0.0120, 0.1263, 0.0421, 0.0200, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([-0.1181], grad_fn=<SelectBackward0>)
tensor(1.)
191/500 with lr 0.0004545781307912651



  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:09<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:11<?, ?it/s]
(Game length:2):   1%|▏         | 1/70 [08:59<10:20:12, 539.31s/it]
(Game length:3):   1%|▏         | 1/70 [08:59<10:20:12, 539.31s/it]
(Game length:3):   3%|▎         | 2/70 [11:32<5:54:02, 312.39s/it] 
(Game length:4):   6%|▌         | 4/70 [11:32<5:43:37, 312.39s/it]
(Game length:5):   6%|▌         | 4/70 [13:43<5:43:37, 312.39s/it]
(Game length:5):   7%|▋         | 5/70 [15:41<2:40:20, 148.00s/it]
(Game length:6):  11%|█▏        | 8/70 [15:41<2:32:56, 148.00s/it]
(Game length:6):  13%|█▎        | 9/70 [17:22<1:18:29, 77.20s/it] 
(Game length:7):  16%|█▌        | 11/70 [17:22<1:15:54, 77.20s/it]
(Game length:7):  17%|█▋        | 12/70 [18:57<57:47, 59.79s/it]  
(Game length:8):  29%|██▊       | 20/70 [18:57<49:49, 59.79s/it]
(Game length:8):  30%|███       | 21/70 [20:17<23:01, 


Loss: 1.3334438080588975
Policy Loss 0.4990721419453621
Value Loss 0.8343716661135355
6 batches processed
Examples:
tensor([0.0605, 0.0629, 0.0654, 0.0499, 0.0586, 0.0579, 0.0579, 0.0653, 0.0621,
        0.0641, 0.0497, 0.0585, 0.0584, 0.0570, 0.0679, 0.0538, 0.0500, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000])
tensor([0.0140, 0.7054, 0.0120, 0.1242, 0.0060, 0.0100, 0.0100, 0.0080, 0.0080,
        0.0120, 0.0160, 0.0100, 0.0140, 0.0100, 0.0140, 0.0180, 0.0080, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:10<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:10<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:02<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:42<13:27:47, 702.42s/it]
(Game length:4):   3%|▎         | 2/70 [11:42<13:16:04, 702.42s/it]
(Game length:4):   4%|▍         | 3/70 [14:00<4:20:22, 233.18s/it] 
(Game length:5):   6%|▌         | 4/70 [14:00<4:16:29, 233.18s/it]
(Game length:5):   7%|▋         | 5/70 [15:59<2:36:00, 144.00s/it]
(Game length:6):   9%|▊         | 6/70 [15:59<2:33:36, 144.00s/it]
(Game length:6):  10%|█         | 7/70 [17:44<1:50:25, 105.17s/it]
(Game length:7):  14%|█▍        | 10/70 [17:44<1:45:10, 105.17s/it]
(Game length:7):  16%|█▌        | 11/70 [19:18<59:26, 60.45s/it]   
(Game length:8):  27%|██▋       | 19/70 [19:18<51:23, 60.45s/it]
(Game length:8):  29%|██▊       | 20/70 [20:38<23:00, 27.62s/it


Loss: 1.444189546008905
Policy Loss 0.5501720383763313
Value Loss 0.8940175076325735
6 batches processed
Examples:
tensor([0.0269, 0.0268, 0.0290, 0.0264, 0.0245, 0.0334, 0.0274, 0.0303, 0.0314,
        0.0260, 0.0317, 0.0277, 0.0314, 0.0214, 0.0253, 0.0239, 0.0276, 0.0288,
        0.0271, 0.0269, 0.0291, 0.0265, 0.0245, 0.0334, 0.0272, 0.0305, 0.0313,
        0.0262, 0.0315, 0.0275, 0.0313, 0.0213, 0.0252, 0.0241, 0.0275, 0.0287,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.9198, 0.0020, 0.0020, 0.0000, 0.0000, 0.0020, 0.0000, 0.0040, 0.0080,
        0.0000, 0.0020, 0.0060, 0.0100, 0.0000, 0.0000, 0.0000, 0.0000, 0.0020,
        0.0000, 0.0040, 0.0020, 0.0000, 0.0000, 0.0080, 0.0000, 0.0140, 0.0020,
        0.0000, 0.0020, 0.0040, 0.0040, 0.0


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:11<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:13<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:04<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:45<13:31:51, 705.97s/it]
(Game length:4):   1%|▏         | 1/70 [11:45<13:31:51, 705.97s/it]
(Game length:5):   1%|▏         | 1/70 [14:12<13:31:51, 705.97s/it]
(Game length:5):   3%|▎         | 2/70 [16:22<8:33:56, 453.48s/it] 
(Game length:6):   7%|▋         | 5/70 [16:22<8:11:16, 453.48s/it]
(Game length:6):   9%|▊         | 6/70 [18:03<2:11:42, 123.48s/it]
(Game length:7):  11%|█▏        | 8/70 [18:03<2:07:35, 123.48s/it]
(Game length:7):  13%|█▎        | 9/70 [19:34<1:22:38, 81.29s/it] 
(Game length:8):  21%|██▏       | 15/70 [19:34<1:14:30, 81.29s/it]
(Game length:8):  23%|██▎       | 16/70 [20:57<35:36, 39.56s/it]  
(Game length:9):  31%|███▏      | 22/70 [20:57<31:39, 39.56s/i


Loss: 1.4315513471762338
Policy Loss 0.5194155275821686
Value Loss 0.9121358195940653
6 batches processed
Examples:
tensor([0.0714, 0.0840, 0.0815, 0.0595, 0.0921, 0.0790, 0.0836, 0.0571, 0.0911,
        0.0801, 0.0825, 0.0687, 0.0694, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.0140, 0.0381, 0.0381, 0.0060, 0.1162, 0.0100, 0.0180, 0.0020, 0.6112,
        0.0180, 0.0421, 0.0381, 0.0481, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:09<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:12<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:02<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:43<13:28:58, 703.46s/it]
(Game length:4):   4%|▍         | 3/70 [11:43<13:05:32, 703.46s/it]
(Game length:4):   6%|▌         | 4/70 [14:03<3:07:58, 170.88s/it] 
(Game length:5):   7%|▋         | 5/70 [14:03<3:05:07, 170.88s/it]
(Game length:5):   9%|▊         | 6/70 [16:05<2:11:11, 122.99s/it]
(Game length:6):  11%|█▏        | 8/70 [16:05<2:07:05, 122.99s/it]
(Game length:6):  13%|█▎        | 9/70 [17:44<1:20:58, 79.65s/it] 
(Game length:7):  19%|█▊        | 13/70 [17:44<1:15:40, 79.65s/it]
(Game length:7):  20%|██        | 14/70 [19:09<43:03, 46.13s/it]  
(Game length:8):  26%|██▌       | 18/70 [19:09<39:58, 46.13s/it]
(Game length:8):  27%|██▋       | 19/70 [20:26<27:54, 32.83s/it]



Loss: 1.3911907076835632
Policy Loss 0.5276655852794647
Value Loss 0.8635251224040985
6 batches processed
Examples:
tensor([0.0169, 0.0223, 0.0165, 0.0148, 0.0188, 0.0195, 0.0197, 0.0137, 0.0159,
        0.0201, 0.0179, 0.0192, 0.0170, 0.0225, 0.0164, 0.0148, 0.0186, 0.0195,
        0.0197, 0.0138, 0.0159, 0.0202, 0.0178, 0.0192, 0.0171, 0.0231, 0.0164,
        0.0147, 0.0191, 0.0196, 0.0190, 0.0136, 0.0167, 0.0148, 0.0202, 0.0175,
        0.0183, 0.0185, 0.0174, 0.0237, 0.0169, 0.0147, 0.0188, 0.0192, 0.0194,
        0.0139, 0.0169, 0.0148, 0.0168, 0.0203, 0.0172, 0.0180, 0.0188, 0.0179,
        0.0185, 0.0174, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.0281, 0.0020, 0.0301, 0.0020, 0.0100, 0.0080, 0.0060, 0.0160, 0.0341,
        0.0100, 0.0020, 0.0140, 0.0120, 0.0160, 0.0020, 0.0721, 0.0060, 0.0080,
        0.0641, 0.0040, 0.0080, 0.0160, 0.0160, 0.0040, 0.0080, 0.0080, 0.0080,
        0.0060, 0.0060, 0.0140, 0.0200, 0.0080, 0.0040, 0.0140, 0.0621, 0.0040,
        0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:09<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:11<?, ?it/s]
(Game length:3):   0%|          | 0/70 [09:03<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:44<13:30:27, 704.75s/it]
(Game length:4):   1%|▏         | 1/70 [11:44<13:30:27, 704.75s/it]
(Game length:4):   3%|▎         | 2/70 [14:11<7:06:51, 376.65s/it] 
(Game length:5):   4%|▍         | 3/70 [14:11<7:00:35, 376.65s/it]
(Game length:5):   6%|▌         | 4/70 [16:17<3:18:00, 180.01s/it]
(Game length:6):  13%|█▎        | 9/70 [16:17<3:03:00, 180.01s/it]
(Game length:6):  14%|█▍        | 10/70 [18:00<1:01:14, 61.25s/it]
(Game length:7):  21%|██▏       | 15/70 [18:00<56:08, 61.25s/it]  
(Game length:7):  23%|██▎       | 16/70 [19:24<33:26, 37.15s/it]
(Game length:8):  29%|██▊       | 20/70 [19:24<30:57, 37.15s/it]
(Game length:8):  30%|███       | 21/70 [20:46<23:57, 29.33s/it]
(G


Loss: 1.4032005667686462
Policy Loss 0.5381133556365967
Value Loss 0.8650872111320496
5 batches processed
Examples:
tensor([0.0320, 0.0318, 0.0291, 0.0318, 0.0331, 0.0296, 0.0298, 0.0355, 0.0294,
        0.0309, 0.0342, 0.0336, 0.0319, 0.0336, 0.0311, 0.0305, 0.0311, 0.0315,
        0.0303, 0.0307, 0.0334, 0.0348, 0.0264, 0.0310, 0.0337, 0.0309, 0.0374,
        0.0331, 0.0313, 0.0374, 0.0391, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000])
tensor([0.8978, 0.0020, 0.0000, 0.0040, 0.0100, 0.0000, 0.0000, 0.0040, 0.0000,
        0.0120, 0.0160, 0.0040, 0.0020, 0.0040, 0.0040, 0.0020, 0.0000, 0.0020,
        0.0000, 0.0020, 0.0040, 0.0040, 0.0000, 0.0000, 0.0060, 0.0000, 0.0040,
        0.0040, 0.0040, 0.0060, 0.0020, 0.0000, 0.0000, 0.0000, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:09<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:09<?, ?it/s]
(Game length:2):   1%|▏         | 1/70 [08:54<10:14:43, 534.55s/it]
(Game length:3):   1%|▏         | 1/70 [08:54<10:14:43, 534.55s/it]
(Game length:3):   3%|▎         | 2/70 [11:26<5:50:36, 309.37s/it] 
(Game length:4):   4%|▍         | 3/70 [11:26<5:45:27, 309.37s/it]
(Game length:5):   4%|▍         | 3/70 [13:38<5:45:27, 309.37s/it]
(Game length:5):   6%|▌         | 4/70 [15:40<3:34:28, 194.98s/it]
(Game length:6):  13%|█▎        | 9/70 [15:40<3:18:13, 194.98s/it]
(Game length:6):  14%|█▍        | 10/70 [17:16<1:04:33, 64.56s/it]
(Game length:7):  17%|█▋        | 12/70 [17:16<1:02:24, 64.56s/it]
(Game length:7):  19%|█▊        | 13/70 [18:43<49:44, 52.36s/it]  
(Game length:8):  30%|███       | 21/70 [18:43<42:45, 52.36s/it]
(Game length:8):  31%|███▏      | 22/70 [19:59<21:01, 


Loss: 1.3801751792430879
Policy Loss 0.5039259016513824
Value Loss 0.8762492775917053
5 batches processed
Examples:
tensor([0.3022, 0.0504, 0.0658, 0.0501, 0.0519, 0.0302, 0.0266, 0.0337, 0.0244,
        0.0288, 0.0754, 0.0188, 0.0895, 0.0156, 0.0142, 0.1222, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000])
tensor([0.0401, 0.0521, 0.0020, 0.0982, 0.6072, 0.0361, 0.0160, 0.0020, 0.0060,
        0.0020, 0.0281, 0.0321, 0.0200, 0.0261, 0.0080, 0.0240, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]
(Game length:1):   0%|          | 0/70 [03:07<?, ?it/s]
(Game length:2):   0%|          | 0/70 [06:08<?, ?it/s]
(Game length:3):   0%|          | 0/70 [08:57<?, ?it/s]
(Game length:3):   1%|▏         | 1/70 [11:34<13:18:39, 694.48s/it]
(Game length:4):   6%|▌         | 4/70 [11:34<12:43:55, 694.48s/it]
(Game length:4):   7%|▋         | 5/70 [13:46<2:22:34, 131.61s/it] 
(Game length:5):   9%|▊         | 6/70 [13:46<2:20:23, 131.61s/it]
(Game length:5):  10%|█         | 7/70 [15:45<1:49:23, 104.19s/it]
(Game length:6):  14%|█▍        | 10/70 [15:45<1:44:11, 104.19s/it]
(Game length:6):  16%|█▌        | 11/70 [17:23<1:01:49, 62.88s/it] 
(Game length:7):  24%|██▍       | 17/70 [17:23<55:32, 62.88s/it]  
(Game length:7):  26%|██▌       | 18/70 [18:43<29:19, 33.84s/it]
(Game length:8):  34%|███▍      | 24/70 [18:43<25:56, 33.84s/it]
(Game length:8):  36%|███▌      | 25/70 [20:00<17:43, 23.63s/it]



Loss: 1.4687478939692178
Policy Loss 0.5521951615810394
Value Loss 0.9165527323881785
6 batches processed
Examples:
tensor([0.0688, 0.0549, 0.0644, 0.0751, 0.0532, 0.0505, 0.0689, 0.0760, 0.0629,
        0.0733, 0.0657, 0.0772, 0.0640, 0.0725, 0.0725, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000])
tensor([0.0100, 0.0180, 0.0120, 0.0020, 0.0160, 0.0100, 0.0240, 0.7315, 0.0080,
        0.0020, 0.0160, 0.0361, 0.0020, 0.0441, 0.0681, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.


  0%|          | 0/70 [00:00<?, ?it/s]
(Game length:0):   0%|          | 0/70 [00:00<?, ?it/s]

KeyboardInterrupt: 