## Use bonus plates (length of 4)
## Important notes:
### - new color model: number of channels = 4, one channel for each color  
### - plate value encoded with a number: 0.1, 0.4, 1.0  
### - v3.2: all helpers moved to external module
### - v3.2: hint restored: s_before is now field after 'make_move'
### - v3.3: debugging CNN since learning doesn't work
### - v3.4: try bigger CNN 
### - v3.5: problem found. Large space of moves just doesn't work. Reduce to 1

In [10]:
# Needed for tests and a real game against the phone
#import pandas as pd
#import qgrid

import random as rd
import numpy as np
import tensorflow as tf

from collections import deque

import environment as ae

%pylab inline

Populating the interactive namespace from numpy and matplotlib


## Constants

In [11]:
# CNN
LEARNING_RATE = 0.001
UPDATE_TARGET_NET = 1000
MINIBATCH_SIZE = 128
GAMMA = 0.99

# Definitions
GAMES_TO_PLAY = 131072
REPLAY_MEMORY_SIZE = 8192
DYNAMIC_LEARNING_EPOCHS = 5
NUMBER_OF_MOVES_IN_GAME = 50

# Variables
MAXIMUM_SCORE = 0
CNN_MOVE_PROB = 0.1
CNN_MOVES_COUNT = 0
CNN_SUCCESSFUL_PREDICTION = 0
AGG_GAMES_NUMBER = 10
TOTAL_SCORE_AGG = 0.0
TOTAL_SUCCESSFUL_MOVES_AGG = 0.0

### Replay Memory Buffer

In [12]:
#
# Replay memory buffer
#
class ExperienceBuffer():
    '''
    Experience Replay Buffer
    Inspired by Andrea Lonza
    '''

    def __init__(self, buffer_size, gamma):
        # Constants
        self.gamma = gamma
        
        # Main Replay Memory buffer parts
        self.states_before = deque(maxlen=buffer_size)
        self.actions = deque(maxlen=buffer_size)
        self.total_rewards = deque(maxlen=buffer_size)
        self.states_after = deque(maxlen=buffer_size)
        self.last_moves = deque(maxlen=buffer_size)
   
    
    def add(self, state_before, action, reward, state_after, last_move):
        # Add certain items to corresponding buffers
        self.states_before.append(state_before)
        self.actions.append(action)
        self.total_rewards.append(reward)
        self.states_after.append(state_after)
        self.last_moves.append(last_move)
    
    
    def sample_minibatch(self, minibatch_size):
        '''
        Sample a minibatch of size batch_size
        Note1: always add the most recent completed move
        '''
        indices = rd.sample(range(len(self.states_before) - 1), minibatch_size - 1)
        # Add the most recent completed move index
        indices.append(len(self.states_before) - 1)
        
        minibatch_states_before = np.array([self.states_before[i] for i in indices]) 
        minibatch_actions = np.array([self.actions[i] for i in indices]) 
        minibatch_total_rewards = np.array([self.total_rewards[i] for i in indices]) 
        minibatch_states_after = np.array([self.states_after[i] for i in indices])  
        minibatch_last_moves = np.array([self.last_moves[i] for i in indices])   
        
        return minibatch_states_before, minibatch_actions, minibatch_total_rewards, minibatch_states_after, minibatch_last_moves
    
    
    def __len__(self):
        '''
        Return length of the current replay memory buffer
        Relevant for the first *minibatch_size* moves.
        '''
        return len(self.states_before)
    
    

## Deep Q-Nework

### Initialize Replay Buffer

In [13]:
replay_memory = ExperienceBuffer(REPLAY_MEMORY_SIZE, GAMMA)

### Initialize Online CNN and Target CNN

In [14]:
tf.random.set_seed(8)

# Initialize optimizer
online_cnn_optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)


# Online CNN
Online_CNN = tf.keras.models.Sequential()
Online_CNN.add(tf.keras.layers.Conv2D(256, kernel_size=3, strides = (1, 1), padding='valid', activation=None, data_format='channels_last', kernel_initializer='GlorotUniform', input_shape=(7, 6, 4)))
Online_CNN.add(tf.keras.layers.Activation("relu"))
Online_CNN.add(tf.keras.layers.BatchNormalization())

Online_CNN.add(tf.keras.layers.Flatten())        

Online_CNN.add(tf.keras.layers.Dense(32, activation=None, kernel_initializer='GlorotUniform'))
Online_CNN.add(tf.keras.layers.Activation("relu"))
Online_CNN.add(tf.keras.layers.BatchNormalization())

Online_CNN.add(tf.keras.layers.Dense(1, activation=None, kernel_initializer='GlorotUniform'))
Online_CNN.add(tf.keras.layers.Activation("relu"))


Online_CNN.compile(optimizer=online_cnn_optimizer, loss="mean_squared_error")

# Target CNN
#Target_CNN = tf.keras.models.Sequential()
#Target_CNN.add(tf.keras.layers.Conv2D(128, kernel_size=3, strides = (1, 1), padding='same', activation=tf.keras.activations.tanh, data_format = 'channels_last', kernel_initializer='RandomNormal', input_shape=(7, 6, 4)))
#Target_CNN.add(tf.keras.layers.Conv2D(64, kernel_size=3, strides = (1, 1), padding='same', activation=tf.keras.activations.tanh, kernel_initializer='RandomNormal'))    
#Target_CNN.add(tf.keras.layers.Flatten())                      
#Target_CNN.add(tf.keras.layers.Dense(32, activation=tf.keras.activations.relu, kernel_initializer='RandomNormal'))
#Target_CNN.add(tf.keras.layers.Dense(1, activation=tf.keras.activations.relu, kernel_initializer='RandomNormal'))

#Target_CNN.compile(optimizer=online_cnn_optimizer, loss="mean_squared_error")


# Set weights equal
#Target_CNN.set_weights(Online_CNN.get_weights())

# Just to see how many trainable parameters
Online_CNN.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 5, 4, 256)         9472      
_________________________________________________________________
activation (Activation)      (None, 5, 4, 256)         0         
_________________________________________________________________
batch_normalization_4 (Batch (None, 5, 4, 256)         1024      
_________________________________________________________________
flatten_1 (Flatten)          (None, 5120)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                163872    
_________________________________________________________________
activation_1 (Activation)    (None, 32)                0         
_________________________________________________________________
batch_normalization_5 (Batch (None, 32)               

In [8]:
#def loss(model, X, y_true, A):
#    prediction = model(X)
#    selected_action_values = tf.math.reduce_sum(prediction*A, axis=1)  
#    return tf.keras.losses.MSE(y_true, selected_action_values)


#def grad(model, inputs, targets, actions):
#    with tf.GradientTape() as tape:
#        loss_value = loss(model, inputs, targets, actions)
#    return loss_value, tape.gradient(loss_value, model.trainable_variables)

## Dynamic Learning

In [16]:
%%time

FOUND = False

# Number of moves made to follow the target CNN update strategy
total_moves = 1
                                                      
for game in range(GAMES_TO_PLAY):
    # Start one game
    game_score = 0
    successful_moves = 0

    # Initialize the game field
    field = np.zeros((7, 6, 4))
    field = ae.initialize_field_3D(field)

    for m in range(NUMBER_OF_MOVES_IN_GAME):
        # Total score of one move
        reward = 0

        # Whether CNN made the move
        cnn_made_move_flag = False
        
        # If replay_memory has less than 64 moves, then make a random move
        if ((len(replay_memory) < MINIBATCH_SIZE) or (rd.random() > CNN_MOVE_PROB)):
            move = rd.randint(1, ae.ACTIONS_DIMENSION)
        else:
            # CNN selects a move
            cnn_made_move_flag = True
            CNN_MOVES_COUNT = CNN_MOVES_COUNT + 1
            #_, move = ae.predict_max_score_3D(field.copy(), Target_CNN, ae.ACTIONS_DIMENSION, ae.MOVES)
            _, move = ae.predict_max_score_3D(field.copy(), Online_CNN, ae.ACTIONS_DIMENSION, ae.MOVES)

            
        # Make the move
        new_field, plate_a, plate_b = ae.make_move_v2_3D(field.copy(), move, ae.MOVES)
        field_after_move = new_field.copy()

        # Calculate the score and update the field
        score, new_field = ae.calculate_score_v2_3D(new_field, plate_a, plate_b)
        
        # If the move is successful, then update the field and check if we have new sets        
        successful_move_flag = False

        # While we have new sets (thus the score is greater than 0), process them, calculate score and move plates
        while (score > 0.):
            if (not successful_move_flag):
                successful_moves = successful_moves + 1
                successful_move_flag = True

            # Add new points to the total score of the move
            reward = reward + score

            # Move plates downward, fill the upper row so, that it doesn't have "easy" sets of three
            # Start from the left lower corner (in order to reuse color_fits())
            new_field = ae.fill_field_3D(new_field)

            # Calculate score and check whether we have new sets
            score, new_field = ae.calculate_score_v2_3D(new_field, (-1, -1), (-1, -1))

        # Increase the score of the whole game
        game_score = game_score + reward
        
        # Check whether it's the last move of the current game
        last_move = m == NUMBER_OF_MOVES_IN_GAME - 1         

        # Add new move to the replay memory
        if (successful_move_flag):
            replay_memory.add(field_after_move, move, reward, new_field, last_move)
            
            # Update CNN move statistics
            if (cnn_made_move_flag):    
                CNN_SUCCESSFUL_PREDICTION = CNN_SUCCESSFUL_PREDICTION + 1               
                
        else:
            replay_memory.add(field_after_move, move, 0, field, last_move)   
        
        #
        # Train CNN based on the score
        #
        if (len(replay_memory) >= MINIBATCH_SIZE):
            # Select random MINIBATCH_SIZE moves from replay memory buffer
            samples = replay_memory.sample_minibatch(MINIBATCH_SIZE)

            # Prepare some things for training
            s_before = samples[0]
            actions = samples[1]
            rewards = samples[2]
            s_after = samples[3] 
            dones = samples[4]
            
            # Carefully predict next rewards: we must use predict_max_score
            rewards_next = np.zeros((MINIBATCH_SIZE))
            
            for item in range(MINIBATCH_SIZE):
                # Calculate future rewards ONLY if current reward is NOT zero, otherwise no sense
                if (rewards[item] > 0):
                    #rewards_next[item], _ = ae.predict_max_score_3D(s_after[item], Target_CNN, ae.ACTIONS_DIMENSION, ae.MOVES)
                    rewards_next[item], _ = ae.predict_max_score_3D(s_after[item], Online_CNN, ae.ACTIONS_DIMENSION, ae.MOVES)


            #
            # Update online CNN weights: training step
            #
            actual_values = np.where(dones, rewards, rewards + GAMMA*rewards_next)
            
            #
            # This one doesn't work
            #
            
            #selected_actions = tf.one_hot(actions - 1, ae.ACTIONS_DIMENSION)
            
            #for _ in range(DYNAMIC_LEARNING_EPOCHS):             
            #    loss_value, grads = grad(Online_CNN, s_before, actual_values, selected_actions)
            #    optimizer.apply_gradients(zip(grads, Online_CNN.trainable_variables))
            
            history = Online_CNN.fit(x=s_before, y=actual_values, epochs=DYNAMIC_LEARNING_EPOCHS, verbose=0)
            
            # DEBUG: Trying to find a reason of NaN
            for layer in Online_CNN.layers:
                weights = layer.get_weights()
                for w in weights:
                    if (np.isnan(w).sum() > 0):
                        FOUND = True
                        break
                if (FOUND):
                    break
                    
        if (FOUND):
            print("NaN value found!")
            break

        # If move is successful, update the play field
        if (successful_move_flag):
            field = np.copy(new_field)
            
        # After each UPDATE_TARGET_NET moves update target CNN
        #if (total_moves % UPDATE_TARGET_NET == 0):
        #    Target_CNN.set_weights(Online_CNN.get_weights())
            
        total_moves = total_moves + 1

    if (FOUND):
        break
        
    #
    # Calculate and display overall stats
    #
    # Check whether we have new maximum score
    if (game_score > MAXIMUM_SCORE):
        print(f"New maximum: {game_score}, after {game} games.")
        MAXIMUM_SCORE = game_score
        
    # After each AGG_GAMES_NUMBER games output average game score, average number of successful moves per game
    TOTAL_SCORE_AGG = TOTAL_SCORE_AGG + game_score
    TOTAL_SUCCESSFUL_MOVES_AGG = TOTAL_SUCCESSFUL_MOVES_AGG + successful_moves
    
    if ((game % AGG_GAMES_NUMBER == 0) and (game > 0)):
        avg_score = TOTAL_SCORE_AGG / AGG_GAMES_NUMBER
        TOTAL_SCORE_AGG = 0.0
        
        avg_succ_moves = TOTAL_SUCCESSFUL_MOVES_AGG / AGG_GAMES_NUMBER
        TOTAL_SUCCESSFUL_MOVES_AGG = 0.0

        loss_value = history.history["loss"]
        print(f"Games: {game}, last {AGG_GAMES_NUMBER} games avg score: {avg_score}, avg of succ moves: {avg_succ_moves}, loss: {loss_value}")        
        print(f"CNN made {CNN_MOVES_COUNT} moves. Successful were {CNN_SUCCESSFUL_PREDICTION}")
        print(f"rewards_next is {rewards_next.sum()}")
        
        if (CNN_SUCCESSFUL_PREDICTION / CNN_MOVES_COUNT >= CNN_MOVE_PROB):
            CNN_MOVE_PROB = CNN_MOVE_PROB + 0.1
            
        CNN_MOVES_COUNT = 0
        CNN_SUCCESSFUL_PREDICTION = 0


New maximum: 41, after 0 games.
New maximum: 103, after 1 games.
New maximum: 118, after 10 games.
Games: 10, last 10 games avg score: 74.5, avg of succ moves: 10.4, loss: [889.613525390625, 595.6383666992188, 576.4573364257812, 391.62493896484375, 339.24249267578125]
CNN made 36 moves. Successful were 7
rewards_next is 3262.8039379119873
New maximum: 144, after 14 games.
Games: 20, last 10 games avg score: 78.8, avg of succ moves: 10.8, loss: [173789.4375, 194731.765625, 116686.578125, 169240.953125, 160481.921875]
CNN made 99 moves. Successful were 22
rewards_next is 64588.660217285156
Games: 30, last 10 games avg score: 60.0, avg of succ moves: 9.3, loss: [2103005.0, 1203282.0, 937201.125, 1016950.9375, 836138.125]
CNN made 151 moves. Successful were 30
rewards_next is 112376.56170463562
Games: 40, last 10 games avg score: 67.0, avg of succ moves: 9.3, loss: [23817372.0, 22005104.0, 21802560.0, 20358920.0, 19098000.0]
CNN made 155 moves. Successful were 36
rewards_next is 383792.986

Games: 380, last 10 games avg score: 150.2, avg of succ moves: 20.3, loss: [50229059584.0, 62882058240.0, 45813297152.0, 40326819840.0, 44943400960.0]
CNN made 303 moves. Successful were 166
rewards_next is 32036974.18359375
Games: 390, last 10 games avg score: 187.8, avg of succ moves: 20.3, loss: [150043049984.0, 150773235712.0, 153791528960.0, 144583000064.0, 139119165440.0]
CNN made 295 moves. Successful were 161
rewards_next is 50975666.5
Games: 400, last 10 games avg score: 149.2, avg of succ moves: 18.5, loss: [43524747264.0, 44033540096.0, 40061788160.0, 41150210048.0, 35682344960.0]
CNN made 297 moves. Successful were 149
rewards_next is 29055060.1875
Games: 410, last 10 games avg score: 174.8, avg of succ moves: 20.4, loss: [1021516709888.0, 1020054863872.0, 1025514078208.0, 1000073527296.0, 977609752576.0]
CNN made 301 moves. Successful were 153
rewards_next is 91169810.3125
Games: 420, last 10 games avg score: 173.0, avg of succ moves: 22.3, loss: [35219931136.0, 3212294963

Games: 740, last 10 games avg score: 225.9, avg of succ moves: 29.3, loss: [33158332416000.0, 33282301362176.0, 33216429817856.0, 33030169165824.0, 33081121570816.0]
CNN made 413 moves. Successful were 276
rewards_next is 245307342.375
Games: 750, last 10 games avg score: 204.4, avg of succ moves: 25.1, loss: [336610787328.0, 403249102848.0, 385563131904.0, 483512680448.0, 351459672064.0]
CNN made 405 moves. Successful were 238
rewards_next is 127305910.125
Games: 760, last 10 games avg score: 199.6, avg of succ moves: 23.4, loss: [280298586112.0, 285184262144.0, 225349550080.0, 209562697728.0, 169863266304.0]
CNN made 395 moves. Successful were 213
rewards_next is 132037296.3125
Games: 770, last 10 games avg score: 214.7, avg of succ moves: 26.0, loss: [1348318396416.0, 1307305967616.0, 1263270625280.0, 1334830170112.0, 1228000198656.0]
CNN made 397 moves. Successful were 241
rewards_next is 178390328.28125
Games: 780, last 10 games avg score: 249.4, avg of succ moves: 28.5, loss: [55

Games: 1100, last 10 games avg score: 235.8, avg of succ moves: 29.1, loss: [1005760937984.0, 1027616735232.0, 797872816128.0, 793986465792.0, 664507973632.0]
CNN made 465 moves. Successful were 283
rewards_next is 223756697.33203125
Games: 1110, last 10 games avg score: 252.6, avg of succ moves: 26.9, loss: [1453672890368.0, 1316032217088.0, 1272395071488.0, 1393248436224.0, 1261547552768.0]
CNN made 446 moves. Successful were 256
rewards_next is 254873683.875
Games: 1120, last 10 games avg score: 172.9, avg of succ moves: 22.9, loss: [7976459436032.0, 7890409619456.0, 7736987746304.0, 7776592461824.0, 7532383830016.0]
CNN made 440 moves. Successful were 216
rewards_next is 445067213.5
Games: 1130, last 10 games avg score: 221.7, avg of succ moves: 24.8, loss: [996972691456.0, 961858240512.0, 932073111552.0, 895910608896.0, 743454277632.0]
CNN made 461 moves. Successful were 242
rewards_next is 254251445.125
Games: 1140, last 10 games avg score: 222.5, avg of succ moves: 26.2, loss: [

Games: 1450, last 10 games avg score: 203.5, avg of succ moves: 24.8, loss: [41478411780096.0, 42129069965312.0, 41961536880640.0, 42033288839168.0, 41961306193920.0]
CNN made 453 moves. Successful were 244
rewards_next is 625138704.75
Games: 1460, last 10 games avg score: 167.6, avg of succ moves: 21.7, loss: [12741723029504.0, 9257957195776.0, 13485368934400.0, 8196048551936.0, 8302761082880.0]
CNN made 455 moves. Successful were 208
rewards_next is 326075633.125
Games: 1470, last 10 games avg score: 243.6, avg of succ moves: 27.7, loss: [99699553992704.0, 99456921894912.0, 98808868372480.0, 97366505947136.0, 97555367067648.0]
CNN made 442 moves. Successful were 265
rewards_next is 232762024.625
Games: 1480, last 10 games avg score: 219.1, avg of succ moves: 26.4, loss: [37479985971200.0, 36403937280000.0, 36599656087552.0, 35782316261376.0, 36868540334080.0]
CNN made 447 moves. Successful were 254
rewards_next is 143910769.5
Games: 1490, last 10 games avg score: 192.3, avg of succ m

Games: 1800, last 10 games avg score: 199.5, avg of succ moves: 23.0, loss: [11760635478016.0, 12250238681088.0, 11519005818880.0, 11797072445440.0, 11382453960704.0]
CNN made 500 moves. Successful were 230
rewards_next is 740760256.25
Games: 1810, last 10 games avg score: 216.3, avg of succ moves: 29.3, loss: [12892843802624.0, 14619952283648.0, 14125020217344.0, 12006186811392.0, 12476721659904.0]
CNN made 500 moves. Successful were 293
rewards_next is 851078923.0
Games: 1820, last 10 games avg score: 286.9, avg of succ moves: 31.8, loss: [26861264437248.0, 22490847903744.0, 23123367821312.0, 26797926252544.0, 27568499589120.0]
CNN made 500 moves. Successful were 318
rewards_next is 804597773.0
Games: 1830, last 10 games avg score: 243.9, avg of succ moves: 28.2, loss: [14680888180736.0, 14293334491136.0, 15873209270272.0, 17271137239040.0, 13550945828864.0]
CNN made 500 moves. Successful were 282
rewards_next is 881900790.25
Games: 1840, last 10 games avg score: 231.0, avg of succ m

Games: 2150, last 10 games avg score: 269.7, avg of succ moves: 30.7, loss: [21977825804288.0, 19413241692160.0, 19082340466688.0, 21288991064064.0, 17171094700032.0]
CNN made 500 moves. Successful were 307
rewards_next is 1136802138.0
Games: 2160, last 10 games avg score: 256.7, avg of succ moves: 30.7, loss: [123446268788736.0, 127603562250240.0, 129720217763840.0, 127622201737216.0, 127926741762048.0]
CNN made 500 moves. Successful were 307
rewards_next is 139247712.0
Games: 2170, last 10 games avg score: 218.5, avg of succ moves: 25.7, loss: [49547959074816.0, 48242431623168.0, 51103206998016.0, 48642664693760.0, 47267000090624.0]
CNN made 500 moves. Successful were 257
rewards_next is 1351488565.0
Games: 2180, last 10 games avg score: 222.7, avg of succ moves: 27.1, loss: [23337417834496.0, 25587408175104.0, 22247133675520.0, 24995696738304.0, 26148503289856.0]
CNN made 500 moves. Successful were 271
rewards_next is 1139191019.0
Games: 2190, last 10 games avg score: 256.1, avg of 

Games: 2500, last 10 games avg score: 176.9, avg of succ moves: 21.0, loss: [14414694580224.0, 14920123940864.0, 14158802190336.0, 14095044575232.0, 15765698772992.0]
CNN made 500 moves. Successful were 210
rewards_next is 592538111.0
Games: 2510, last 10 games avg score: 297.4, avg of succ moves: 35.7, loss: [25486537261056.0, 24866715598848.0, 25313740324864.0, 25242187595776.0, 25542348767232.0]
CNN made 500 moves. Successful were 357
rewards_next is 355688265.0
Games: 2520, last 10 games avg score: 189.6, avg of succ moves: 24.6, loss: [8448366027472896.0, 8448527088746496.0, 8450693899747328.0, 8447167731597312.0, 8448003102736384.0]
CNN made 500 moves. Successful were 246
rewards_next is 1757901437.5
Games: 2530, last 10 games avg score: 185.7, avg of succ moves: 22.1, loss: [136088991563776.0, 134785158610944.0, 139673418596352.0, 135255272980480.0, 139769703038976.0]
CNN made 500 moves. Successful were 221
rewards_next is 1388484598.0
Games: 2540, last 10 games avg score: 230.5

Games: 2850, last 10 games avg score: 297.5, avg of succ moves: 35.2, loss: [62177876639744.0, 66057515565056.0, 65772281921536.0, 61898846371840.0, 66477935820800.0]
CNN made 500 moves. Successful were 352
rewards_next is 1579881502.0
Games: 2860, last 10 games avg score: 265.9, avg of succ moves: 31.9, loss: [28654438449152.0, 27618736865280.0, 28130538422272.0, 27070864293888.0, 27540292894720.0]
CNN made 500 moves. Successful were 319
rewards_next is 685723953.5
Games: 2870, last 10 games avg score: 265.6, avg of succ moves: 29.8, loss: [30131647152128.0, 35919323201536.0, 33390187249664.0, 34990813347840.0, 29764687495168.0]
CNN made 500 moves. Successful were 298
rewards_next is 1182952760.0
Games: 2880, last 10 games avg score: 287.6, avg of succ moves: 32.5, loss: [52312735219712.0, 55018845634560.0, 49880605130752.0, 51501863010304.0, 61635993534464.0]
CNN made 500 moves. Successful were 325
rewards_next is 1303782234.5
Games: 2890, last 10 games avg score: 319.4, avg of succ 

Games: 3200, last 10 games avg score: 284.8, avg of succ moves: 33.4, loss: [675038332190720.0, 668472199610368.0, 676454127894528.0, 677039988277248.0, 666035208323072.0]
CNN made 500 moves. Successful were 334
rewards_next is 4004335428.0
Games: 3210, last 10 games avg score: 339.2, avg of succ moves: 39.9, loss: [129478558744576.0, 130966118989824.0, 130096706879488.0, 135813895553024.0, 131192200364032.0]
CNN made 500 moves. Successful were 399
rewards_next is 2420641000.0
Games: 3220, last 10 games avg score: 293.3, avg of succ moves: 35.8, loss: [64918137077760.0, 51547819999232.0, 55409775738880.0, 50305026752512.0, 51731153027072.0]
CNN made 500 moves. Successful were 358
rewards_next is 1663609683.0
Games: 3230, last 10 games avg score: 327.9, avg of succ moves: 35.7, loss: [22596449992704.0, 22961706762240.0, 21310109384704.0, 21100733923328.0, 22930805227520.0]
CNN made 500 moves. Successful were 357
rewards_next is 1009887071.75
Games: 3240, last 10 games avg score: 292.9, 

Games: 3550, last 10 games avg score: 410.6, avg of succ moves: 48.2, loss: [12774010781696.0, 12814447017984.0, 12373061533696.0, 12218722680832.0, 12022984998912.0]
CNN made 500 moves. Successful were 482
rewards_next is 927020314.0
Games: 3560, last 10 games avg score: 369.7, avg of succ moves: 43.7, loss: [7124581089280.0, 6705802903552.0, 6939190755328.0, 6746452000768.0, 7127883055104.0]
CNN made 500 moves. Successful were 437
rewards_next is 1117833484.0
Games: 3570, last 10 games avg score: 345.5, avg of succ moves: 43.4, loss: [23226354761728.0, 23291244838912.0, 23984395517952.0, 23855181594624.0, 24868319920128.0]
CNN made 500 moves. Successful were 434
rewards_next is 1116497947.0
Games: 3580, last 10 games avg score: 348.9, avg of succ moves: 40.0, loss: [6403955621888.0, 6777029525504.0, 6294670409728.0, 6766996750336.0, 5617257283584.0]
CNN made 500 moves. Successful were 400
rewards_next is 1112998242.0
Games: 3590, last 10 games avg score: 326.7, avg of succ moves: 41.

Games: 3900, last 10 games avg score: 354.7, avg of succ moves: 39.6, loss: [25045871099904.0, 25828142350336.0, 24980335099904.0, 25136879108096.0, 23287757275136.0]
CNN made 500 moves. Successful were 396
rewards_next is 1648392417.0
Games: 3910, last 10 games avg score: 348.9, avg of succ moves: 41.9, loss: [11271934050304.0, 10833252843520.0, 9877890007040.0, 10052482105344.0, 9655187144704.0]
CNN made 500 moves. Successful were 419
rewards_next is 1440741115.0
Games: 3920, last 10 games avg score: 402.1, avg of succ moves: 48.6, loss: [9681042931712.0, 9059826663424.0, 8780157288448.0, 8634330251264.0, 8599734583296.0]
CNN made 500 moves. Successful were 486
rewards_next is 1186996620.0
Games: 3930, last 10 games avg score: 349.1, avg of succ moves: 44.6, loss: [9588536508416.0, 9110818914304.0, 9267307347968.0, 9172006469632.0, 8740294098944.0]
CNN made 500 moves. Successful were 446
rewards_next is 1144950912.0
Games: 3940, last 10 games avg score: 370.8, avg of succ moves: 44.2

Games: 4250, last 10 games avg score: 365.3, avg of succ moves: 42.5, loss: [17110501687296.0, 17816373690368.0, 17481194274816.0, 17245675716608.0, 18943771148288.0]
CNN made 500 moves. Successful were 425
rewards_next is 1447074767.5
Games: 4260, last 10 games avg score: 363.5, avg of succ moves: 43.1, loss: [70391284367360.0, 64254732402688.0, 65917555834880.0, 62585546211328.0, 59893486714880.0]
CNN made 500 moves. Successful were 431
rewards_next is 2061530572.0
Games: 4270, last 10 games avg score: 424.5, avg of succ moves: 47.5, loss: [20319196676096.0, 21270078947328.0, 23012952768512.0, 19931873673216.0, 18411845320704.0]
CNN made 500 moves. Successful were 475
rewards_next is 1620779162.0
Games: 4280, last 10 games avg score: 288.5, avg of succ moves: 34.6, loss: [741433224986624.0, 727025220321280.0, 719090603786240.0, 721206210723840.0, 714958778138624.0]
CNN made 500 moves. Successful were 346
rewards_next is 4127873056.0
Games: 4290, last 10 games avg score: 292.5, avg of

Games: 4600, last 10 games avg score: 380.3, avg of succ moves: 46.4, loss: [37225773400064.0, 36898047262720.0, 36740370792448.0, 36472782585856.0, 36133341757440.0]
CNN made 500 moves. Successful were 464
rewards_next is 830020196.0
Games: 4610, last 10 games avg score: 360.6, avg of succ moves: 44.0, loss: [73086728667136.0, 72863432310784.0, 71990111109120.0, 72258177466368.0, 72315496824832.0]
CNN made 500 moves. Successful were 440
rewards_next is 458523586.9375
Games: 4620, last 10 games avg score: 286.7, avg of succ moves: 33.0, loss: [1.1092745179365376e+17, 1.1092629215248384e+17, 1.109283795065897e+17, 1.109321247180718e+17, 1.1092572521680077e+17]
CNN made 500 moves. Successful were 330
rewards_next is 5315371105.0
Games: 4630, last 10 games avg score: 29.1, avg of succ moves: 4.5, loss: [84738211577856.0, 101711284797440.0, 101103966355456.0, 82214423363584.0, 75672139595776.0]
CNN made 500 moves. Successful were 45
rewards_next is 1365779826.0
Games: 4640, last 10 games a

Games: 4950, last 10 games avg score: 151.8, avg of succ moves: 18.7, loss: [65678799273984.0, 65309268508672.0, 65281267335168.0, 65422778957824.0, 66821495455744.0]
CNN made 500 moves. Successful were 187
rewards_next is 488518307.0
Games: 4960, last 10 games avg score: 150.8, avg of succ moves: 18.2, loss: [63251823984640.0, 63376243818496.0, 64367794061312.0, 62717331243008.0, 62373788385280.0]
CNN made 500 moves. Successful were 182
rewards_next is 531560993.0
Games: 4970, last 10 games avg score: 93.0, avg of succ moves: 12.5, loss: [227065257263104.0, 236331632427008.0, 225284188012544.0, 211118714781696.0, 222806260318208.0]
CNN made 500 moves. Successful were 125
rewards_next is 2044877627.75
Games: 4980, last 10 games avg score: 167.2, avg of succ moves: 20.9, loss: [88829797072896.0, 89799159447552.0, 92635112931328.0, 99024111665152.0, 94306543075328.0]
CNN made 500 moves. Successful were 209
rewards_next is 1407743746.0
Games: 4990, last 10 games avg score: 167.9, avg of s

Games: 5300, last 10 games avg score: 334.7, avg of succ moves: 39.3, loss: [51914326671360.0, 52571565719552.0, 51397215125504.0, 50019759554560.0, 50371913318400.0]
CNN made 500 moves. Successful were 393
rewards_next is 810644704.0
Games: 5310, last 10 games avg score: 266.0, avg of succ moves: 33.6, loss: [77481126133760.0, 68573984718848.0, 72432987668480.0, 70242495627264.0, 70591277170688.0]
CNN made 500 moves. Successful were 336
rewards_next is 1943184494.0
Games: 5320, last 10 games avg score: 292.3, avg of succ moves: 33.0, loss: [102004315652096.0, 103367363788800.0, 99562240868352.0, 102994490163200.0, 103977727295488.0]
CNN made 500 moves. Successful were 330
rewards_next is 483089347.3671875
Games: 5330, last 10 games avg score: 187.1, avg of succ moves: 22.0, loss: [171191927046144.0, 168149513142272.0, 165802481287168.0, 170927803334656.0, 163739152154624.0]
CNN made 500 moves. Successful were 220
rewards_next is 116523516.0
Games: 5340, last 10 games avg score: 214.9,

Games: 5640, last 10 games avg score: 342.2, avg of succ moves: 40.5, loss: [43468143460352.0, 40190487822336.0, 38675761070080.0, 43028274216960.0, 39889852694528.0]
CNN made 500 moves. Successful were 405
rewards_next is 1521734022.0
Games: 5650, last 10 games avg score: 332.9, avg of succ moves: 38.6, loss: [91731156533248.0, 91262770216960.0, 89486985789440.0, 98128451600384.0, 86149913640960.0]
CNN made 500 moves. Successful were 386
rewards_next is 2274583008.0
Games: 5660, last 10 games avg score: 357.9, avg of succ moves: 44.1, loss: [75210573217792.0, 73680642113536.0, 73629555490816.0, 74141654843392.0, 71643217002496.0]
CNN made 500 moves. Successful were 441
rewards_next is 1549670994.0
Games: 5670, last 10 games avg score: 371.5, avg of succ moves: 42.6, loss: [82313098559488.0, 77275160641536.0, 77247100747776.0, 74837464711168.0, 74675078037504.0]
CNN made 500 moves. Successful were 426
rewards_next is 2224013198.0
Games: 5680, last 10 games avg score: 274.2, avg of succ

Games: 5990, last 10 games avg score: 365.9, avg of succ moves: 44.5, loss: [22164197605376.0, 22334935138304.0, 22244430446592.0, 21450985570304.0, 20986143440896.0]
CNN made 500 moves. Successful were 445
rewards_next is 1078287736.75
Games: 6000, last 10 games avg score: 344.2, avg of succ moves: 40.5, loss: [18204564914176.0, 17392611622912.0, 17287780237312.0, 17029207687168.0, 16632897339392.0]
CNN made 500 moves. Successful were 405
rewards_next is 1305686271.0
Games: 6010, last 10 games avg score: 294.5, avg of succ moves: 35.7, loss: [16868116004864.0, 17833740206080.0, 16903383810048.0, 17324530728960.0, 17904483434496.0]
CNN made 500 moves. Successful were 357
rewards_next is 1570698438.5
Games: 6020, last 10 games avg score: 381.4, avg of succ moves: 45.5, loss: [16553124823040.0, 15875239313408.0, 16555021697024.0, 17275216199680.0, 16766361141248.0]
CNN made 500 moves. Successful were 455
rewards_next is 1188449066.0
Games: 6030, last 10 games avg score: 335.1, avg of suc

Games: 6340, last 10 games avg score: 349.3, avg of succ moves: 40.1, loss: [16207770025984.0, 15857115725824.0, 16366222442496.0, 16258125791232.0, 15337233842176.0]
CNN made 500 moves. Successful were 401
rewards_next is 1210066256.5
Games: 6350, last 10 games avg score: 345.1, avg of succ moves: 39.5, loss: [6576582688768.0, 7575612424192.0, 6999307714560.0, 6751607324672.0, 6748877881344.0]
CNN made 500 moves. Successful were 395
rewards_next is 1053869948.0
Games: 6360, last 10 games avg score: 356.8, avg of succ moves: 42.2, loss: [206568465367040.0, 201630762926080.0, 202375285440512.0, 199769733464064.0, 200554940727296.0]
CNN made 500 moves. Successful were 422
rewards_next is 2663712092.0
Games: 6370, last 10 games avg score: 252.8, avg of succ moves: 31.5, loss: [10600390328320.0, 9707708219392.0, 10273525071872.0, 10499986030592.0, 9171710771200.0]
CNN made 500 moves. Successful were 315
rewards_next is 1152349277.5
Games: 6380, last 10 games avg score: 333.7, avg of succ m

Games: 6690, last 10 games avg score: 21.0, avg of succ moves: 3.0, loss: [43858243092480.0, 43883350196224.0, 44040754036736.0, 43957882978304.0, 43876538646528.0]
CNN made 500 moves. Successful were 30
rewards_next is 0.0
Games: 6700, last 10 games avg score: 7.5, avg of succ moves: 1.2, loss: [51080171880448.0, 51942109741056.0, 51275844550656.0, 51099335655424.0, 51246467645440.0]
CNN made 500 moves. Successful were 12
rewards_next is 89153989.0
Games: 6710, last 10 games avg score: 23.6, avg of succ moves: 2.5, loss: [37483941199872.0, 37522956615680.0, 37305238683648.0, 37288587296768.0, 37261055885312.0]
CNN made 500 moves. Successful were 25
rewards_next is 19247050.0
Games: 6720, last 10 games avg score: 13.5, avg of succ moves: 2.1, loss: [66918484541440.0, 66855964246016.0, 67178468474880.0, 66957839695872.0, 66579970654208.0]
CNN made 500 moves. Successful were 21
rewards_next is 135126232.0
Games: 6730, last 10 games avg score: 24.3, avg of succ moves: 2.9, loss: [30260854

Games: 7050, last 10 games avg score: 15.4, avg of succ moves: 3.5, loss: [977037950976.0, 975404531712.0, 971364761600.0, 975856992256.0, 975802597376.0]
CNN made 500 moves. Successful were 35
rewards_next is 8618205.6875
Games: 7060, last 10 games avg score: 29.7, avg of succ moves: 4.7, loss: [725719777280.0, 728112955392.0, 732487090176.0, 728605130752.0, 724784119808.0]
CNN made 500 moves. Successful were 47
rewards_next is 0.0
Games: 7070, last 10 games avg score: 27.0, avg of succ moves: 3.8, loss: [545899675648.0, 572269461504.0, 544337297408.0, 527330377728.0, 518056443904.0]
CNN made 500 moves. Successful were 38
rewards_next is 10530572.0
Games: 7080, last 10 games avg score: 20.4, avg of succ moves: 3.3, loss: [3691852070912.0, 2457455820800.0, 2659881320448.0, 2331294564352.0, 2012435316736.0]
CNN made 500 moves. Successful were 33
rewards_next is 0.0
Games: 7090, last 10 games avg score: 23.7, avg of succ moves: 2.9, loss: [6638338048000.0, 5650349293568.0, 6401738932224.

Games: 7420, last 10 games avg score: 14.4, avg of succ moves: 1.9, loss: [1217879986929664.0, 1219480533336064.0, 1235541831974912.0, 1240152210931712.0, 1235215146024960.0]
CNN made 500 moves. Successful were 19
rewards_next is 939278544.0
Games: 7430, last 10 games avg score: 9.8, avg of succ moves: 1.6, loss: [94760635203584.0, 91065302384640.0, 93821522149376.0, 104397551960064.0, 103938980315136.0]
CNN made 500 moves. Successful were 16
rewards_next is 332531396.0
Games: 7440, last 10 games avg score: 13.5, avg of succ moves: 2.4, loss: [2987815862272.0, 3151349940224.0, 3030993338368.0, 3196904275968.0, 3103104958464.0]
CNN made 500 moves. Successful were 24
rewards_next is 17113024.0
Games: 7450, last 10 games avg score: 12.7, avg of succ moves: 2.1, loss: [14602626662400.0, 13384085929984.0, 12974868660224.0, 13269051899904.0, 13757891739648.0]
CNN made 500 moves. Successful were 21
rewards_next is 2454053.75
Games: 7460, last 10 games avg score: 9.0, avg of succ moves: 1.4, l

Games: 7780, last 10 games avg score: 20.2, avg of succ moves: 2.9, loss: [2169820938240.0, 2289464508416.0, 2102834757632.0, 1860976115712.0, 2194303352832.0]
CNN made 500 moves. Successful were 29
rewards_next is 31272048.25
Games: 7790, last 10 games avg score: 27.5, avg of succ moves: 3.4, loss: [229398167552.0, 213315436544.0, 187037401088.0, 190323163136.0, 215529193472.0]
CNN made 500 moves. Successful were 34
rewards_next is 2257083.875
Games: 7800, last 10 games avg score: 27.3, avg of succ moves: 3.7, loss: [209457020928.0, 198254723072.0, 210156453888.0, 220347039744.0, 228895899648.0]
CNN made 500 moves. Successful were 37
rewards_next is 11902835.625
Games: 7810, last 10 games avg score: 40.5, avg of succ moves: 5.3, loss: [7779488104448.0, 7779303555072.0, 7748367417344.0, 7675656536064.0, 7829715943424.0]
CNN made 500 moves. Successful were 53
rewards_next is 45030010.0
Games: 7820, last 10 games avg score: 29.0, avg of succ moves: 3.4, loss: [110005436416.0, 11234626764

Games: 8160, last 10 games avg score: 4.7, avg of succ moves: 0.7, loss: [20783738880.0, 21127303168.0, 20132216832.0, 16010600448.0, 19021072384.0]
CNN made 500 moves. Successful were 7
rewards_next is 0.0
Games: 8170, last 10 games avg score: 3.1, avg of succ moves: 0.8, loss: [85221679104.0, 76242493440.0, 81976057856.0, 83673620480.0, 81357651968.0]
CNN made 500 moves. Successful were 8
rewards_next is 1563051.59375
Games: 8180, last 10 games avg score: 10.0, avg of succ moves: 1.6, loss: [3060678400.0, 5397740032.0, 4668290048.0, 5061514240.0, 3982626816.0]
CNN made 500 moves. Successful were 16
rewards_next is 0.0
Games: 8190, last 10 games avg score: 5.5, avg of succ moves: 1.0, loss: [6562696192.0, 3197518592.0, 2516455424.0, 3325285376.0, 3651434240.0]
CNN made 500 moves. Successful were 10
rewards_next is 0.0
Games: 8200, last 10 games avg score: 7.0, avg of succ moves: 0.8, loss: [2463302418432.0, 2456693768192.0, 2461313531904.0, 2454409707520.0, 2460757786624.0]
CNN made 5

Games: 8630, last 10 games avg score: 0.3, avg of succ moves: 0.1, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Games: 8640, last 10 games avg score: 0.3, avg of succ moves: 0.1, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Games: 8650, last 10 games avg score: 0.0, avg of succ moves: 0.0, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 0
rewards_next is 0.0
Games: 8660, last 10 games avg score: 0.3, avg of succ moves: 0.1, loss: [0.0703125, 0.0703125, 0.0703125, 0.0703125, 0.0703125]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Games: 8670, last 10 games avg score: 0.3, avg of succ moves: 0.1, loss: [0.6328125, 0.6328125, 0.6328125, 0.6328125, 0.6328125]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Games: 8680, last 10 games avg score: 1.2, avg of succ moves: 0.4, loss: [0.0703125, 0.0703125, 0.0703125, 0.0703125, 0.0703125]
CNN made 500 moves. Succes

Games: 9140, last 10 games avg score: 1.2, avg of succ moves: 0.4, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 4
rewards_next is 0.0
Games: 9150, last 10 games avg score: 0.9, avg of succ moves: 0.1, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Games: 9160, last 10 games avg score: 0.0, avg of succ moves: 0.0, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 0
rewards_next is 0.0
Games: 9170, last 10 games avg score: 2.1, avg of succ moves: 0.3, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 3
rewards_next is 0.0
Games: 9180, last 10 games avg score: 0.6, avg of succ moves: 0.2, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 2
rewards_next is 0.0
Games: 9190, last 10 games avg score: 0.0, avg of succ moves: 0.0, loss: [0.0703125, 0.0703125, 0.0703125, 0.0703125, 0.0703125]
CNN made 500 moves. Successful were 0
rewards_next is 0.0
Games: 9200, last 10 games a

Games: 9630, last 10 games avg score: 1.2, avg of succ moves: 0.4, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 4
rewards_next is 0.0
Games: 9640, last 10 games avg score: 2.8, avg of succ moves: 0.2, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 2
rewards_next is 0.0
Games: 9650, last 10 games avg score: 0.0, avg of succ moves: 0.0, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 0
rewards_next is 0.0
Games: 9660, last 10 games avg score: 0.6, avg of succ moves: 0.2, loss: [0.0703125, 0.0703125, 0.0703125, 0.0703125, 0.0703125]
CNN made 500 moves. Successful were 2
rewards_next is 0.0
Games: 9670, last 10 games avg score: 0.3, avg of succ moves: 0.1, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Games: 9680, last 10 games avg score: 1.0, avg of succ moves: 0.1, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Games: 9690, last 10 games a

Games: 10130, last 10 games avg score: 1.2, avg of succ moves: 0.3, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 3
rewards_next is 0.0
Games: 10140, last 10 games avg score: 0.0, avg of succ moves: 0.0, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 0
rewards_next is 0.0
Games: 10150, last 10 games avg score: 0.0, avg of succ moves: 0.0, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 0
rewards_next is 0.0
Games: 10160, last 10 games avg score: 0.3, avg of succ moves: 0.1, loss: [0.140625, 0.140625, 0.140625, 0.140625, 0.140625]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Games: 10170, last 10 games avg score: 0.9, avg of succ moves: 0.3, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 3
rewards_next is 0.0
Games: 10180, last 10 games avg score: 0.0, avg of succ moves: 0.0, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 0
rewards_next is 0.0
Games: 10190, last 10 games

Games: 10640, last 10 games avg score: 0.6, avg of succ moves: 0.2, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 2
rewards_next is 0.0
Games: 10650, last 10 games avg score: 0.6, avg of succ moves: 0.2, loss: [0.0703125, 0.0703125, 0.0703125, 0.0703125, 0.0703125]
CNN made 500 moves. Successful were 2
rewards_next is 0.0
Games: 10660, last 10 games avg score: 0.0, avg of succ moves: 0.0, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 0
rewards_next is 0.0
Games: 10670, last 10 games avg score: 0.3, avg of succ moves: 0.1, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Games: 10680, last 10 games avg score: 0.3, avg of succ moves: 0.1, loss: [0.28125, 0.28125, 0.28125, 0.28125, 0.28125]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Games: 10690, last 10 games avg score: 0.3, avg of succ moves: 0.1, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Ga

Games: 11130, last 10 games avg score: 0.3, avg of succ moves: 0.1, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Games: 11140, last 10 games avg score: 0.3, avg of succ moves: 0.1, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Games: 11150, last 10 games avg score: 1.8, avg of succ moves: 0.2, loss: [0.421875, 0.421875, 0.421875, 0.421875, 0.421875]
CNN made 500 moves. Successful were 2
rewards_next is 0.0
Games: 11160, last 10 games avg score: 0.6, avg of succ moves: 0.1, loss: [0.0703125, 0.0703125, 0.0703125, 0.0703125, 0.0703125]
CNN made 500 moves. Successful were 1
rewards_next is 0.0
Games: 11170, last 10 games avg score: 0.0, avg of succ moves: 0.0, loss: [0.3515625, 0.3515625, 0.3515625, 0.3515625, 0.3515625]
CNN made 500 moves. Successful were 0
rewards_next is 0.0
Games: 11180, last 10 games avg score: 0.6, avg of succ moves: 0.2, loss: [0.0, 0.0, 0.0, 0.0, 0.0]
CNN made 500 moves. Succe

KeyboardInterrupt: 

In [17]:
print(MAXIMUM_SCORE)

651


## Debuggin the Hint

In [None]:
# Total score of one move
reward = 0

# Whether CNN made the move
cnn_made_move_flag = False

# If replay_memory has less than 64 moves, then make a random move
if ((len(replay_memory) < MINIBATCH_SIZE) or (rd.random() > CNN_MOVE_PROB)):
    move = rd.randint(1, ae.ACTIONS_DIMENSION)
else:
    # CNN selects a move
    cnn_made_move_flag = True
    CNN_MOVES_COUNT = CNN_MOVES_COUNT + 1
    scr, move = ae.predict_max_score_3D(field.copy(), Target_CNN, ae.ACTIONS_DIMENSION, ae.MOVES)
    print(f"Predicted move {move} with score of {scr}")

In [None]:
# Make the move
new_field, plate_a, plate_b = ae.make_move_v2_3D(field.copy(), move, ae.MOVES)
field_after_move = new_field.copy()

# Calculate the score and update the field
score, new_field = ae.calculate_score_v2_3D(new_field, plate_a, plate_b)

# If the move is successful, then update the field and check if we have new sets        
successful_move_flag = False

# While we have new sets (thus the score is greater than 0), process them, calculate score and move plates
while (score > 0.):
    if (not successful_move_flag):
        successful_moves = successful_moves + 1
        successful_move_flag = True

    # Add new points to the total score of the move
    reward = reward + score

    # Move plates downward, fill the upper row so, that it doesn't have "easy" sets of three
    # Start from the left lower corner (in order to reuse color_fits())
    new_field = ae.fill_field_3D(new_field)

    # Calculate score and check whether we have new sets
    score, new_field = ae.calculate_score_v2_3D(new_field, (-1, -1), (-1, -1))

print(reward)


In [None]:

#
# Train CNN based on the score
#

samples = replay_memory.sample_minibatch(MINIBATCH_SIZE)

# Prepare some things for training
s_before = samples[0]
actions = samples[1]
rewards = samples[2]
s_after = samples[3] 
dones = samples[4]


In [None]:
def loss(model, X, y_true, A):
    prediction = model(X)
    selected_action_values = tf.math.reduce_sum(prediction*A, axis=1)  
    return tf.keras.losses.MSE(y_true, selected_action_values)


def grad(model, inputs, targets, actions):
    with tf.GradientTape() as tape:
        loss_value = loss(model, inputs, targets, actions)
    return loss_value, tape.gradient(loss_value, model.trainable_variables)

In [None]:
rewards_next = np.max(Target_CNN(s_after), axis=1)
actual_values = np.where(dones, rewards, rewards + GAMMA*rewards_next)
selected_actions = tf.one_hot(actions, ae.ACTIONS_DIMENSION)

In [None]:
loss_value, grads = grad(Online_CNN, s_before, actual_values, selected_actions)

print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(), loss_value.numpy()))

optimizer.apply_gradients(zip(grads, Online_CNN.trainable_variables))

print("Step: {}, Loss: {}".format(optimizer.iterations.numpy(), loss(Online_CNN, s_before, actual_values, selected_actions).numpy()))

### Now the CNN has been trained.
### Start the long reinforcement-learning cycle

In [None]:
successful_moves

In [None]:
new_field = make_move(field, move)
print(new_field)

In [None]:
temp_field = calculate_score(new_field)

In [None]:
new_field = np.multiply(new_field, 1.0 - temp_field)

In [None]:
fill_field(new_field, colors)

In [None]:
#
# Save model
#
# v1: 20190329, trained on len(replay_memory) = 294912
#aero_cnn.save("Aero_CNN_v1")

## Backup

In [None]:
#
# Create the moves dictionary
#
moves = {}

for i in range(1, 143):
    old_row, old_column, old_direction = process_move_142(i)
    
    start_row = old_row - 1
    start_col = old_column - 1
    
    if (old_direction == "down"):
        end_row = start_row + 1
        end_col = start_col
    elif (old_direction == "up"):
        end_row = start_row - 1
        end_col = start_col
    elif (old_direction == "right"):
        end_row = start_row
        end_col = start_col + 1
    else:
        end_row = start_row
        end_col = start_col - 1
        
    moves[i] = ((start_row, start_col), (end_row, end_col))
    
print(moves)