### Use bonus plates (length of 4)

In [82]:
import pandas as pd
import random as rd
import numpy as np
#import qgrid

import tensorflow as tf

from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D
from keras.models import Model, load_model, Sequential
from keras.optimizers import Adam
#from keras.callbacks import TensorBoard

from collections import deque

#import matplotlib.pyplot as plt
#import matplotlib.patches as mpatches
#%pylab inline

## Constants

In [83]:
# CNN
LEARNING_RATE = 0.01
UPDATE_TARGET_NET = 1000

# Definitions
GAMES_TO_PLAY = 120001
REPLAY_MEMORY_SIZE = 131072
DYNAMIC_LEARNING_EPOCHS = 2
MINIBATCH_SIZE = 64
NUMBER_OF_MOVES_IN_GAME = 50
GAMMA = 0.99

# Variables
MAXIMUM_SCORE = 0
TOTAL_SCORE_100 = 0.0
AVG_SCORE_HIST = []
TOTAL_SUCCESSFUL_MOVES_100 = 0.0
AVG_SUCC_MOVES_HIST = []
CNN_MOVE_PROB = 0.1
CNN_MOVES_COUNT = 0
CNN_SUCCESSFUL_PREDICTION = 0

# Initialize replay_memory
replay_memory = []

In [84]:
#
# Definitions
#
# RED = 0.2
# GREEN = 0.4
# BLUE = 0.6
# PURPLE = 0.8
#
colors = [0.2, 0.4, 0.6, 0.8]

moves = {1: ((0, 0), (1, 0)), 2: ((0, 1), (1, 1)), 3: ((0, 2), (1, 2)), 4: ((0, 3), (1, 3)), 5: ((0, 4), (1, 4)), 
         6: ((0, 5), (1, 5)), 7: ((1, 0), (2, 0)), 8: ((1, 0), (0, 0)), 9: ((1, 1), (2, 1)), 10: ((1, 1), (0, 1)), 
         11: ((1, 2), (2, 2)), 12: ((1, 2), (0, 2)), 13: ((1, 3), (2, 3)), 14: ((1, 3), (0, 3)), 15: ((1, 4), (2, 4)), 
         16: ((1, 4), (0, 4)), 17: ((1, 5), (2, 5)), 18: ((1, 5), (0, 5)), 19: ((2, 0), (3, 0)), 20: ((2, 0), (1, 0)), 
         21: ((2, 1), (3, 1)), 22: ((2, 1), (1, 1)), 23: ((2, 2), (3, 2)), 24: ((2, 2), (1, 2)), 25: ((2, 3), (3, 3)), 
         26: ((2, 3), (1, 3)), 27: ((2, 4), (3, 4)), 28: ((2, 4), (1, 4)), 29: ((2, 5), (3, 5)), 30: ((2, 5), (1, 5)), 
         31: ((3, 0), (4, 0)), 32: ((3, 0), (2, 0)), 33: ((3, 1), (4, 1)), 34: ((3, 1), (2, 1)), 35: ((3, 2), (4, 2)), 
         36: ((3, 2), (2, 2)), 37: ((3, 3), (4, 3)), 38: ((3, 3), (2, 3)), 39: ((3, 4), (4, 4)), 40: ((3, 4), (2, 4)), 
         41: ((3, 5), (4, 5)), 42: ((3, 5), (2, 5)), 43: ((4, 0), (5, 0)), 44: ((4, 0), (3, 0)), 45: ((4, 1), (5, 1)), 
         46: ((4, 1), (3, 1)), 47: ((4, 2), (5, 2)), 48: ((4, 2), (3, 2)), 49: ((4, 3), (5, 3)), 50: ((4, 3), (3, 3)), 
         51: ((4, 4), (5, 4)), 52: ((4, 4), (3, 4)), 53: ((4, 5), (5, 5)), 54: ((4, 5), (3, 5)), 55: ((5, 0), (6, 0)), 
         56: ((5, 0), (4, 0)), 57: ((5, 1), (6, 1)), 58: ((5, 1), (4, 1)), 59: ((5, 2), (6, 2)), 60: ((5, 2), (4, 2)), 
         61: ((5, 3), (6, 3)), 62: ((5, 3), (4, 3)), 63: ((5, 4), (6, 4)), 64: ((5, 4), (4, 4)), 65: ((5, 5), (6, 5)), 
         66: ((5, 5), (4, 5)), 67: ((6, 0), (5, 0)), 68: ((6, 1), (5, 1)), 69: ((6, 2), (5, 2)), 70: ((6, 3), (5, 3)), 
         71: ((6, 4), (5, 4)), 72: ((6, 5), (5, 5)), 73: ((0, 0), (0, 1)), 74: ((1, 0), (1, 1)), 75: ((2, 0), (2, 1)), 
         76: ((3, 0), (3, 1)), 77: ((4, 0), (4, 1)), 78: ((5, 0), (5, 1)), 79: ((6, 0), (6, 1)), 80: ((0, 1), (0, 0)), 
         81: ((0, 1), (0, 2)), 82: ((1, 1), (1, 0)), 83: ((1, 1), (1, 2)), 84: ((2, 1), (2, 0)), 85: ((2, 1), (2, 2)), 
         86: ((3, 1), (3, 0)), 87: ((3, 1), (3, 2)), 88: ((4, 1), (4, 0)), 89: ((4, 1), (4, 2)), 90: ((5, 1), (5, 0)), 
         91: ((5, 1), (5, 2)), 92: ((6, 1), (6, 0)), 93: ((6, 1), (6, 2)), 94: ((0, 2), (0, 1)), 95: ((0, 2), (0, 3)), 
         96: ((1, 2), (1, 1)), 97: ((1, 2), (1, 3)), 98: ((2, 2), (2, 1)), 99: ((2, 2), (2, 3)), 100: ((3, 2), (3, 1)), 
         101: ((3, 2), (3, 3)), 102: ((4, 2), (4, 1)), 103: ((4, 2), (4, 3)), 104: ((5, 2), (5, 1)), 105: ((5, 2), (5, 3)), 
         106: ((6, 2), (6, 1)), 107: ((6, 2), (6, 3)), 108: ((0, 3), (0, 2)), 109: ((0, 3), (0, 4)), 110: ((1, 3), (1, 2)), 
         111: ((1, 3), (1, 4)), 112: ((2, 3), (2, 2)), 113: ((2, 3), (2, 4)), 114: ((3, 3), (3, 2)), 115: ((3, 3), (3, 4)), 
         116: ((4, 3), (4, 2)), 117: ((4, 3), (4, 4)), 118: ((5, 3), (5, 2)), 119: ((5, 3), (5, 4)), 120: ((6, 3), (6, 2)), 
         121: ((6, 3), (6, 4)), 122: ((0, 4), (0, 3)), 123: ((0, 4), (0, 5)), 124: ((1, 4), (1, 3)), 125: ((1, 4), (1, 5)), 
         126: ((2, 4), (2, 3)), 127: ((2, 4), (2, 5)), 128: ((3, 4), (3, 3)), 129: ((3, 4), (3, 5)), 130: ((4, 4), (4, 3)), 
         131: ((4, 4), (4, 5)), 132: ((5, 4), (5, 3)), 133: ((5, 4), (5, 5)), 134: ((6, 4), (6, 3)), 135: ((6, 4), (6, 5)), 
         136: ((0, 5), (0, 4)), 137: ((1, 5), (1, 4)), 138: ((2, 5), (2, 4)), 139: ((3, 5), (3, 4)), 140: ((4, 5), (4, 4)), 
         141: ((5, 5), (5, 4)), 142: ((6, 5), (6, 4))}

In [85]:
def color_fits(field, i, j, new_color):
    """
    Checks if two items to the left or two colors to the top are NOT of the same color as the new item.
    Input:
    - field: battfield, numpy array
    - i, j: position on the new item, int, within field.shape
    - new_color: color of the new item, float
    Output:
    - boolean: True, if the new item is ok
    """
    # Check two colors to the left
    if (j > 1):
        if (round(field[i, j - 2] % 1.0, 1) == new_color) and (round(field[i, j - 1] % 1.0, 1) == new_color):
            return False
        
    # Check two colors to the right
    if (j < 4):
        if (round(field[i, j + 2] % 1.0, 1) == new_color) and (round(field[i, j + 1] % 1.0, 1) == new_color):
            return False
    
    # Check two color to the top
    if (i < 5):
        if (round(field[i + 2, j] % 1.0, 1) == new_color) and (round(field[i + 1, j] % 1.0, 1) == new_color):
            return False
    
    return True

In [86]:
def initialize_field(field):
    """
    Initialization of the battle field.
    Move from bottom left corner and add new elements.
    Input: 
    - field: numpy array of zeros, 7x6
    Output:
    - field: numpy array of floats, 7x6
    """
    colors = [0.2, 0.4, 0.6, 0.8]
    
    for i in list(range(field.shape[0]))[::-1]:
        for j in range(field.shape[1]):
            rd.seed()
            new_color = rd.choice(colors)
            
            while not color_fits(field, i, j, new_color):
                rd.seed()
                new_color = rd.choice(colors)
                
            field[i, j] = new_color
    
    return field

In [87]:
def visualize_field(field):
    """
    Visualizes the battle field in colored circles
    Handles bonus plates
    Input:
    - field: numpy array of floats, 7x6
    Output:
    - None
    """
    fig, ax = plt.subplots(figsize=(5, 7))

    ax.set_xlim((0, 10))
    ax.set_ylim((0, 13))

    circles = []

    for ii in range(7):
        for jj in range(6):
            if (round(field[ii, jj] % 1.0, 1) == 0.2):
                clr = "red"
            elif (round(field[ii, jj] % 1.0, 1) == 0.4):
                clr = "lightgreen"
            elif (round(field[ii, jj] % 1.0, 1) == 0.6):
                clr = "cyan"
            else:
                clr = "purple"

            #if (field[ii, jj] // 1 == 1.0):
            #    circles.append( mpatches.RegularPolygon((jj + 1, 7 - ii), numVertices=4, radius=0.4, color=clr) )
            #else:
            #    circles.append( mpatches.Circle((jj + 1, 7 - ii), radius=0.4, color=clr) )
             
            #
            # DEBUG
            #
            if (field[ii, jj] == 0.0):
                circles.append( mpatches.RegularPolygon((jj + 1, 7 - ii), numVertices=3, radius=0.2, color="black") )
            elif (field[ii, jj] // 1 == 1.0):
                circles.append( mpatches.RegularPolygon((jj + 1, 7 - ii), numVertices=4, radius=0.4, color=clr) )
            else:
                circles.append( mpatches.Circle((jj + 1, 7 - ii), radius=0.4, color=clr) )


    for circ in circles:
        ax.add_artist(circ)
        
    return

In [88]:
def plate_in_set(plate, row, col, length, direction):
    """
    Checks whether plate is with the set given by row, col, length, direction
    Input:
    - plate: plate location, tuple (row, column)
    - row: row where the set starts
    - col: column where the set starts
    - length: the set's length
    - direction: the set's direction
    Output:
    - True if the plate is within the set, False otherwise
    """
    if (direction == 0):
        # Horizontal set
        if ((plate[0] != row) or (plate[1] < col) or (plate[1] > (col + length - 1))):
            return False
    else:
        # Vertucal set
        if ((plate[1] != col) or (plate[0] < row) or (plate[0] > (row + length - 1))):
            return False
        
    return True

In [89]:
def fill_field(field, colors):
    """
    Наполняет поле после сжигания рядов.
    Сдвигает фишки вниз, заполняя верхний ряд каждый раз так, чтобы верхний ряд не создавал халявной тройки
    Начинает с левого нижнего угла, чтобы переиспользовать color_fits()
    Input:
    - field: numpy array of floats, 7x6
    - colors: list of 4 floats - color values, see the definition above
    Output:
    - numpy array of floats, 7x6 - updated field
    """
    for ii in list(range(7))[::-1]:
        for jj in range(6):
            while (field[ii, jj] == 0.):
                # Опускаем на один вниз
                # Если мы в самом верхнем ряду, то опускать не нужно
                if (ii != 0):
                    for iii in list(range(1, ii + 1))[::-1]:
                        field[iii, jj] = field[iii - 1, jj]

                # Заполняем верх
                new_color = rd.choice(colors)

                while not color_fits(field, 0, jj, new_color):
                    new_color = rd.choice(colors)

                field[0, jj] = new_color  
                
    return field

In [90]:
def make_move_v2(field, move, moves):
    """
    Physically moves plates according to the move
    Input:
    - field: numpy array of floats, 7x6
    - move: particular move to make, 1<=move<=142
    - moves: dictionary of all possible moves defined above
    Output:
    - new_field: updated field with two swapped plates
    - plate_start: coordinates of the plate that started the move, tuple (row, column)
    - plate_end: cooredinates of the plate that ended the move, tuple (row, column)
    """
    (start_row, start_col), (end_row, end_col) = moves[move]

    # Swap two plates and create new (modified) field
    new_field = np.array(field)
    temp_color = field[end_row, end_col]
    new_field[end_row, end_col] = field[start_row, start_col]
    new_field[start_row, start_col] = temp_color
        
    return new_field, (start_row, start_col), (end_row, end_col)

In [91]:
def get_sets(field):
    """
    Finds all sets and all bonus plates included into those sets
    Input:
    - field: numpy array of floats, 7x6
    Output:
    - list of sets coordinates: list of tuples (row_start, column_start, set_length, direction, color), counting from top left corner
      Direction is either 0 (horizontal) or 1 (vertical)
    - list of bonus plates included into sets: list of tuples (row, column, type). 
      Type is either 4 or 5 (reserved for future)
    """
    perm_bonus_plates = []
    perm_sets = []

    # Find all 3+ sets in horizontal row
    for ii in range(field.shape[0]):
        temp_bonus_plates = []
        jj = 0
        len = 1
        while (jj < field.shape[1]):
            if (jj > 0):
                if (round(field[ii, jj] % 1.0, 1) == round(field[ii, jj - 1] % 1.0, 1)):
                    len = len + 1
                else:
                    if (len >= 3):
                        # Add temp list of bonus plates to the permanent list of bonus plates
                        perm_bonus_plates = perm_bonus_plates + temp_bonus_plates
                        
                        # Add to permanent list of sets
                        perm_sets.append((ii, jj - len, len, 0, round(field[ii, jj - 1] % 1, 1)))
                        
                    temp_bonus_plates = []
                    len = 1
            
            if (field[ii, jj] > 1.):
                # Add to temp list of bonus plates
                temp_bonus_plates.append((ii, jj, 4))
            
            jj = jj + 1
            
        if (len >= 3):
            # Add temp list of bonus plates to the permanent list of bonus plates
            perm_bonus_plates = perm_bonus_plates + temp_bonus_plates

            # Add to permanent list of sets
            perm_sets.append((ii, jj - len, len, 0, round(field[ii, jj - 1] % 1, 1)))

    # Find all 3+ sets in vertical columns
    for jj in range(field.shape[1]):
        temp_bonus_plates = []
        ii = 0
        len = 1
        while (ii < field.shape[0]):
            if (ii > 0):
                if (round(field[ii, jj] % 1.0, 1) == round(field[ii - 1, jj] % 1, 1)):
                    len = len + 1
                else:
                    if (len >= 3):
                        # Add temp list of bonus plates to the permanent list of bonus plates
                        perm_bonus_plates = perm_bonus_plates + temp_bonus_plates
                        
                        # Add to permanent list of sets
                        perm_sets.append((ii - len, jj, len, 1, round(field[ii - 1, jj] % 1, 1)))
                        
                    temp_bonus_plates = []
                    len = 1
            
            if (field[ii, jj] > 1.):
                # Add to temp list of bonus plates
                temp_bonus_plates.append((ii, jj, 4))
            
            ii = ii + 1
            
        if (len >= 3):
            # Add temp list of bonus plates to the permanent list of bonus plates
            perm_bonus_plates = perm_bonus_plates + temp_bonus_plates

            # Add to permanent list of sets
            perm_sets.append((ii - len, jj, len, 1, round(field[ii - 1, jj] % 1, 1)))
            
    return perm_sets, perm_bonus_plates

In [92]:
def calculate_score_v2(field, plate_from, plate_to):
    """
    Calculates the score in the field. 
    Replaces all sets with zeros.
    Handles bonus plates: replaces required rows with zeros (Type 4)
    Puts bonus plates, should any set be of the length of 4
    Input:
    - field: numpy array of floats, 7x6
    - plate_from: coordinates of the plate where the move starts, tuple (row, column)
    - plate_to: coordinates of the plate where the move ends, tuple (row, column)
    Output:
    - score: int, 0+
    - field: modified field
    """
    # Get all sets with possible bonus plates
    sets, bonus_plates = get_sets(field)

    # Set all requires plates to zero
    #
    # First handle sets
    for st in sets:
        row = st[0]
        col = st[1]
        lng = st[2]
        drc = st[3]

        if (drc == 0):
            field[row, col:(col + lng)] = 0
        else:
            field[row:(row + lng), col] = 0
    #       
    # Then handle bonus plates/rows
    for pl in bonus_plates:
        row = pl[0]
        col = pl[1]
        typ = pl[2]

        if (typ == 4):
            field[row, :] = 0

    # Calculate score
    score = (field == 0.).sum()

    # Put new bonus plates. Specially care for the move coordinates
    for st in sets:
        row = st[0]
        col = st[1]
        lng = st[2]
        drc = st[3]
        clr = st[4]

        if (lng >= 4):
            if (plate_in_set(plate_from, row, col, lng, drc)):
                # Move start plate in set. Put new bonus plate according to the move coordinates
                field[plate_from[0], plate_from[1]] = clr + 1.0
                
                #
                # DEBUG
                #
                #print("DEBUG: set of 4+ was made!")
                
            elif (plate_in_set(plate_to, row, col, lng, drc)):
                # Move end plate in set. Put new bonus plate according to the move coordinates
                field[plate_to[0], plate_to[1]] = clr + 1.0
                
                #
                # DEBUG
                #
                #print("DEBUG: set of 4+ was made!")
                
            else:
                # Just put the new bonus plate at the very right/bottom of the set
                # This CANNOT happen during the manual move!
                # It CAN ONLY HAPPEN when the field is randomly filled with new plates
                if (drc == 0):
                    field[row, col + lng - 1] = clr + 1.0
                else:
                    field[row + lng - 1, col] = clr + 1.0

    return score, field

### Replay Memory Buffer

In [93]:
#
# Replay memory buffer
#
class ExperienceBuffer():
    '''
    Experience Replay Buffer
    Inspired by Andrea Lonza
    '''

    def __init__(self, buffer_size, gamma):
        # Constants
        self.gamma = gamma
        
        # Main Replay Memory buffer parts
        self.states_before = deque(maxlen=buffer_size)
        self.actions = deque(maxlen=buffer_size)
        self.total_rewards = deque(maxlen=buffer_size)
        self.states_after = deque(maxlen=buffer_size)
        self.last_moves = deque(maxlen=buffer_size)
   
    
    def add(self, state_before, action, reward, state_after, last_move):
        # Add certain items to corresponding buffers
        self.states_before.append(state_before)
        self.actions.append(action)

        self.total_rewards.append(reward)
        self.states_after.append(state_after)
        self.last_moves.append(last_move)
    
    
    def sample_minibatch(self, minibatch_size):
        '''
        Sample a minibatch of size batch_size
        Note1: always add the most recent completed move
        '''
        indices = rd.sample(range(len(self.states_before) - 1), minibatch_size - 1)
        # Add the most recent completed move index
        indices.append(len(self.states_before) - 1)
        
        minibatch_states_before = np.array([self.states_before[i] for i in indices]) 
        minibatch_actions = np.array([self.actions[i] for i in indices]) 
        minibatch_total_rewards = np.array([self.total_rewards[i] for i in indices]) 
        minibatch_states_after = np.array([self.states_after[i] for i in indices])  
        minibatch_last_moves = np.array([self.last_moves[i] for i in indices])   
        
        return minibatch_states_before, minibatch_actions, minibatch_total_rewards, minibatch_states_after, minibatch_last_moves
    
    
    def __len__(self):
        '''
        Return length of the current replay memory buffer
        Relevant for the first *minibatch_size* moves.
        '''
        return len(self.states_before)
    
    

### Deep Q-Nework

In [94]:
class AeroCNN(tf.keras.Model):

    def __init__(self):
        super(AeroCNN, self).__init__()
        
        self.gamma = GAMMA
        self.learning_rate = LEARNING_RATE
        
        self.optimizer = Adam(learning_rate=self.learning_rate)      
        
        self.input_layer = tf.keras.layers.InputLayer(input_shape=(7, 6, 1))
        
        self.hidden_layers = [] 
        self.hidden_layers.append(Conv2D(192, kernel_size=3, strides = (1, 1), padding='same', activation='relu', data_format = 'channels_last'))
        self.hidden_layers.append(Conv2D(128, kernel_size=3, strides = (1, 1), padding='same', activation='relu'))    
        self.hidden_layers.append(Flatten())                      
                                  
        self.output_layer = tf.keras.layers.Dense(142, activation='relu', kernel_initializer='RandomNormal')

                                  
    # Define model forward pass
    def call(self, inputs):
        
        z = self.input_layer(inputs)
                                  
        for layer in self.hidden_layers:
            z = layer(z)
                                  
        output = self.output_layer(z)
                                  
        return output
                   
        
    def train(self, s_before, actions, rewards, s_after, dones, TargetNet):
        
        rewards_next = np.max(TargetNet.predict(s_after), axis=1)
        actual_values = np.where(dones, rewards, rewards + self.gamma*rewards_next)
        
        with tf.GradientTape() as tape:
            prediction = self.call(s_before)
            
            selected_action_values = tf.math.reduce_sum(prediction*tf.one_hot(actions, 142), axis=1)  
            
            loss = tf.math.reduce_mean(tf.square(actual_values - selected_action_values))
            
        gradients = tape.gradient(loss, self.trainable_weights)
        
        self.optimizer.apply_gradients(zip(gradients, self.trainable_weights))
        
        return loss



### Initialize Replay Buffer

In [95]:
replay_memory = ExperienceBuffer(REPLAY_MEMORY_SIZE, GAMMA)

### Initialize Online CNN and Target CNN

In [96]:
cnn_online = AeroCNN()  
cnn_target = AeroCNN()

cnn_target.set_weights(cnn_online.get_weights())

## Dynamic Learning

In [None]:
%%time

# Number of moves made to follow the target CNN update strategy
total_moves = 1

# Initialize TensorBoard
#tensorboard_callback = TensorBoard(log_dir='./logs', profile_batch=5, histogram_freq=0)
                                                      
for game in range(GAMES_TO_PLAY):
    # Start one game
    game_score = 0
    successful_moves = 0

    # Initialize the game field
    field = np.zeros((7, 6))
    field = initialize_field(field)

    for m in range(NUMBER_OF_MOVES_IN_GAME):
        # Total score of one move
        reward = 0

        # Whether CNN made the move
        cnn_made_move_flag = False
        
        # If replay_memory has less than 64 moves, then make a random move
        if ((len(replay_memory) < MINIBATCH_SIZE) or (rd.random() > CNN_MOVE_PROB)):
            move = rd.randint(1, 142)
        else:
            # CNN selects a move
            cnn_made_move_flag = True
            X_data = np.expand_dims(np.expand_dims(field, axis=0), axis=3)
            move = cnn_target.predict(X_data).argmax() + 1

        # Make the move
        s_before = field
        new_field, plate_a, plate_b = make_move_v2(field, move, moves)

        # Calculate the score and update the field
        score, new_field = calculate_score_v2(new_field, plate_a, plate_b)
        
        # Если ход результативный, то обновляем поле и проверяем, получились ли новые цветовые ряды
        # Пока есть новые цветные ряды, обрабатываем их, обсчитываем и сдвигаем блюда
        successful_move_flag = False

        while (score > 0.):
            if (not successful_move_flag):
                successful_moves = successful_moves + 1
                successful_move_flag = True

            # Суммируем набранные очки
            reward = reward + score

            # Сдвигаем фишки вниз, заполняя верхний ряд каждый раз так, чтобы верхний ряд не создавал халявной тройки
            # Начинаем с левого нижнего угла (чтобы переиспользовать color_fits())
            new_field = fill_field(new_field, colors)

            # Считаем очки и обрабатываем новые возможные цветные ряды
            score, new_field = calculate_score_v2(new_field, (-1, -1), (-1, -1))

        # Увеличиваем счет игры
        game_score = game_score + reward
        
        # Update CNN move statistics
        if (cnn_made_move_flag):
            if (successful_move_flag):
                CNN_SUCCESSFUL_PREDICTION = CNN_SUCCESSFUL_PREDICTION + 1
                
            CNN_MOVES_COUNT = CNN_MOVES_COUNT + 1
            
        # Check whether it's the last move of the current game
        last_move = m == NUMBER_OF_MOVES_IN_GAME - 1
        
        #
        # Train CNN based on the score
        #
        if (len(replay_memory) >= MINIBATCH_SIZE):
            # Select random MINIBATCH_SIZE moves from replay memory buffer
            samples = replay_memory.sample_minibatch(MINIBATCH_SIZE)

            # Prepare some things for training
            S_before = np.expand_dims(samples[0], axis=3)
            S_after = np.expand_dims(samples[3], axis=3)            

            # Update online CNN weights: training step
            loss = cnn_online.train(S_before, samples[1], samples[2], S_after, samples[4], cnn_target)
        
        # Add new move to the replay memory
        replay_memory.add(field, move, reward, new_field, last_move)

        # If move is successful, update the play field
        if (successful_move_flag):
            field = new_field
            
        # After each 1000 moves update target CNN
        if (total_moves % UPDATE_TARGET_NET == 0):
            cnn_target.set_weights(cnn_online.get_weights())
            
        total_moves = total_moves + 1

    #
    # Calculate and display overall stats
    #
    # Проверяем, не обновили ли максимум
    if (game_score > MAXIMUM_SCORE):
        print(f"New maximum: {game_score}, after {game} games.")
        MAXIMUM_SCORE = game_score
        
    # After each 500 games output average game score, average number of successful moves per game
    TOTAL_SCORE_100 = TOTAL_SCORE_100 + game_score
    TOTAL_SUCCESSFUL_MOVES_100 = TOTAL_SUCCESSFUL_MOVES_100 + successful_moves
    
    if ((game % 500 == 0) and (game > 0)):
        avg_score = TOTAL_SCORE_100 / 500
        #AVG_SCORE_HIST.append(avg_score)
        TOTAL_SCORE_100 = 0.0
        
        avg_succ_moves = TOTAL_SUCCESSFUL_MOVES_100 / 500
        AVG_SUCC_MOVES_HIST.append(avg_succ_moves)
        TOTAL_SUCCESSFUL_MOVES_100 = 0.0

        print(f"Games: {game}, last 100 games avg score: {avg_score}, avg of successful moves: {avg_succ_moves}, loss {loss}")        
        print(f"CNN made {CNN_MOVES_COUNT} moves. Successful were {CNN_SUCCESSFUL_PREDICTION}")
        
        if (CNN_SUCCESSFUL_PREDICTION / CNN_MOVES_COUNT >= CNN_MOVE_PROB):
            CNN_MOVE_PROB = CNN_MOVE_PROB + 0.1
            
        CNN_MOVES_COUNT = 0
        CNN_SUCCESSFUL_PREDICTION = 0


Games: 500, last 100 games avg score: 58.226, avg of successful moves: 8.866, loss 6396805105319936.0
CNN made 4950 moves. Successful were 485
Games: 1000, last 100 games avg score: 58.464, avg of successful moves: 9.056, loss 5832553239937024.0
CNN made 5061 moves. Successful were 538
Games: 1500, last 100 games avg score: 57.228, avg of successful moves: 8.672, loss 7873952806338560.0
CNN made 4967 moves. Successful were 451
Games: 2000, last 100 games avg score: 57.57, avg of successful moves: 8.888, loss 8459367586201600.0
CNN made 5039 moves. Successful were 520
Games: 2500, last 100 games avg score: 58.254, avg of successful moves: 8.712, loss 9528447223201792.0
CNN made 4949 moves. Successful were 469
Games: 3000, last 100 games avg score: 57.296, avg of successful moves: 8.628, loss 8885053606068224.0
CNN made 4991 moves. Successful were 445
Games: 3500, last 100 games avg score: 56.34, avg of successful moves: 8.484, loss 1.0495594401366016e+16
CNN made 5009 moves. Successful 

Games: 28500, last 100 games avg score: 59.056, avg of successful moves: 8.902, loss 5.093043945550643e+16
CNN made 4960 moves. Successful were 498
Games: 29000, last 100 games avg score: 59.868, avg of successful moves: 8.948, loss 4.427193018168115e+16
CNN made 4861 moves. Successful were 479
Games: 29500, last 100 games avg score: 60.0, avg of successful moves: 8.92, loss 3.67458074492928e+16
CNN made 4921 moves. Successful were 519
Games: 30000, last 100 games avg score: 58.418, avg of successful moves: 8.748, loss 2.511508755133235e+16
CNN made 5066 moves. Successful were 451
Games: 30500, last 100 games avg score: 59.116, avg of successful moves: 8.92, loss 2.7822321402118144e+16
CNN made 4980 moves. Successful were 477
Games: 31000, last 100 games avg score: 56.452, avg of successful moves: 8.702, loss 2.367649899295539e+16
CNN made 5077 moves. Successful were 442
Games: 31500, last 100 games avg score: 56.688, avg of successful moves: 8.626, loss 2.061875071365939e+16
CNN made 

In [78]:
# tensorboard --logdir=./logs --bind_all &

print(MAXIMUM_SCORE)

230


## Test part

In [None]:
#field = np.zeros((7, 6))

df = pd.DataFrame(updated_field)
qgrid_widget = qgrid.show_grid(df, show_toolbar=True)
qgrid_widget

In [None]:
updated_df = qgrid_widget.get_changed_df()
updated_field = updated_df.values
visualize_field(updated_field)

In [None]:
get_sets(updated_field)

In [None]:
score, new_field = calculate_score_v2(updated_field, (6, 3), (5, 3))
print(score)
visualize_field(updated_field)

In [None]:
#updated_field_2 = fill_field(updated_field, colors)
#visualize_field(updated_field_2)

### Now the CNN has been trained.
### Start the long reinforcement-learning cycle

In [None]:
successful_moves

In [None]:
new_field = make_move(field, move)
print(new_field)

In [None]:
temp_field = calculate_score(new_field)

In [None]:
new_field = np.multiply(new_field, 1.0 - temp_field)

In [None]:
fill_field(new_field, colors)

In [None]:
#
# Save model
#
# v1: 20190329, trained on len(replay_memory) = 294912
#aero_cnn.save("Aero_CNN_v1")

## Backup

In [None]:
#
# Create the moves dictionary
#
moves = {}

for i in range(1, 143):
    old_row, old_column, old_direction = process_move_142(i)
    
    start_row = old_row - 1
    start_col = old_column - 1
    
    if (old_direction == "down"):
        end_row = start_row + 1
        end_col = start_col
    elif (old_direction == "up"):
        end_row = start_row - 1
        end_col = start_col
    elif (old_direction == "right"):
        end_row = start_row
        end_col = start_col + 1
    else:
        end_row = start_row
        end_col = start_col - 1
        
    moves[i] = ((start_row, start_col), (end_row, end_col))
    
print(moves)