This code allows the neuroevolution agent's behavior to develop in a more "emergent" way, giving the neural network direct control over the button presses in the game environment (with a little help to get out of infinite loops).  

This approach is quite clearly inferior to the 'decisive' model when considering time to reach each score, but can on rare occasions lead to semi-sensible play.

In [None]:
## %matplotlib auto
# import relevant libraries
import retro, numpy as np, cv2, neat, pickle, time, random, warnings
from scipy.ndimage.measurements import label as sp_label

warnings.filterwarnings('ignore')

random.seed(12345)

######## Begin eval_genomes function ########
def eval_genomes(genomes, config, shrink=8, render=False):
    # initialize genome counter, set popul_size to match pop_size in config
    genome_counter, popul_size = 1 , 100
    # define timeout frame counter limit
    counter_limit = 5000
    # define empty input array to feed neural net
    imgarray = []
    # shot pacing vector (not needed for Columns)
    shot = [0,0,1]
    # reset the environment to initialize gym retro
    ob = env.reset()
    # color mapping to improve contrast and homogenize colors
    colors = np.array([192,156,138,62,166,90,92,46,128,121,69,59])
    new_colors = np.array([202,202,172,172,142,142,112,112,82,82,52,52])
    # initialize the display mapping array
    reshade_map = np.zeros(256,dtype=ob.dtype) 
    # populate the display mapping array
    reshade_map[colors] = new_colors
    reshade_map = reshade_map + np.roll(reshade_map, 1) 
#     print(reshade_map)
    # define valid connections for column labeling
    c_valid_conns = np.array((0,1,0,0,1,0,0,1,0,), dtype=np.int).reshape((3,3))
    # define valid connections for island labeling
    i_valid_conns = np.append(np.append(np.zeros(9),np.ones(9)),np.zeros(9)).reshape(3,3,3)
    # initialize index array for color-broadcasted game state
    ind_grid = np.indices((7,13,6))

    # for each member of the population
    for genome_id, genome in genomes:
        # generate the sequence id in the current generation
        genome_in_gen = genome_counter%popul_size
        # create the neural net
        net = neat.nn.recurrent.RecurrentNetwork.create(genome, config)
        # initialize a few tracking variables
        fitness, counter, frame = 0, 0, 0
        # column height initial state (explained below)
        col_hts_i   = np.zeros(6)
        # initialize island size distribution
        isle_sizes_i = np.array(0)
        # initialize being done as false
        done = False
        # specify a random input on the virtual controller
        ac = env.action_space.sample()
        # reset the environment
        ob = env.reset()
        # define the dimension lengths of the screen output (x, y, color)
        img_x, img_y, img_c = env.observation_space.shape
#         # print the raw dimensions
#         print(f'RAW - X size: {img_x}   Y size: {img_y}   Colors: {img_c}')
        # define the cropped region boundaries and shrink factor
        # dims legend: y0, h, x0, w
        dims = [8, 208, 16, 96]
        # use a list comprehension to calculate the pre-drop scaled dims
        sc_dims = [int(i/shrink) for i in dims]
#         print(f'Cropped and rescaled (pre-drop): \nX offset: {sc_dims[2]}   Y offset: {sc_dims[0]}   \
#               width: {sc_dims[3]}   height: {sc_dims[1]}')
        # make an OpenCV window
        cv2.namedWindow('What the neural net sees', cv2.WINDOW_NORMAL)
        # begin tracking of cycle time
        start_time = time.time()
        # while done is not True
        while not done:           
            # open a render window to follow the action if requested
            if render == True:
                env.render()
            # increment the frame counter
            frame += 1
            # print render time
#             print('Frame', frame, 'render', time.time() - start_time, 's'); start_time = time.time()    

            # prepare a scaled image for rendering of final observation
            # apply color conversion (quirk of OpenCV)
            input_img  = cv2.cvtColor(ob, cv2.COLOR_BGR2GRAY)
#             if frame == 1:
#                 print(f'initial input_img: {input_img.shape}')
            # crop the image via NumPy slicing (dims indices defined above)
            input_img = input_img[dims[0]:dims[0]+dims[1], dims[2]:dims[2]+dims[3]]
#             if frame == 1:
#                 print(f'cropped input_img: {input_img.shape}')
            input_img  = cv2.resize(input_img, (int((sc_dims[3])/2), int(sc_dims[1])))
#             if frame == 1:
#                 print(f'final input_img: {input_img.shape}')
            # remap the colors to homogenize the jewel colorings
            resh_img = reshade_map[input_img]
            # throw out the now-unnecessary y-resolution
            resc_img = resh_img[::2]      
#             print(resc_img)
            # display the (rastered) observation space as NEAT model sees it
            cv2.imshow('What the neural net sees', resc_img)
            cv2.waitKey(1)   
            # build the input to the neural net
            imgarray = np.ravel(resc_img)
            # match range of colors to range of label outputs
            unit_img = np.where(resc_img == 0, 0,(resc_img-22)/30)
            
            #evaluate column height and isle average adjustments only after frame 8
#             if frame > 0:

######## column height penalty calculation ########
            # c_ncomponents is a simple count of the conected columns in labeled
            columns, c_ncomponents = sp_label(unit_img, c_valid_conns)
#                 print(columns)
            # throw out the falling block with .isin(x,x[-1]) combined with... 
            # the mask nonzero(x) 
            drop_falling = np.isin(columns, columns[-1][np.nonzero(columns[-1])])
            col_hts = drop_falling.sum(axis=0)
#                 print(f'col_hts {col_hts}')
            # calculate differentials for the (grounded) column heights
            d_col_hts = np.sum(col_hts - col_hts_i)
#                 print(f'col_hts {col_hts} - col_hts_i {col_hts_i} ===> d_col_hts {d_col_hts}')
            # set col_hts_i to current col_hts for next evaluation
            col_hts_i = col_hts
            # calculate penalty/bonus function
#                 col_pen = (col_hts**4 - 3**4).sum()
            col_pen = np.where(d_col_hts > 0, (col_hts**4 - 3**4), 0).sum()
#                 
#             if col_pen !=0:
#                 print(f'col_pen: {col_pen}')
######## end column height penalty calculation ########

######## color island bonus calculation ########
            # mask the unit_img to remove the falling block
            isle_img = drop_falling * unit_img
#             print(isle_img)
            # broadcast the game board to add a layer for each color
            isle_imgs = np.broadcast_to(isle_img,(7,*isle_img.shape))
            # define a mask to discriminate on color in each layer
            isle_masked = isle_imgs*[isle_imgs==ind_grid[0]]
            # reshape the array to return to 3 dimensions
            isle_masked = isle_masked.reshape(isle_imgs.shape)
            # generate the isle labels
            isle_labels, isle_ncomps = sp_label(isle_masked, i_valid_conns)
            # determine the island sizes (via return_counts) for all the unique labels
            isle_inds, isle_sizes = np.unique(isle_labels, return_counts=True)
            # zero out isle_sizes[0] to remove spike for background (500+ for near empty board)
            isle_sizes[0] = 0
            # evaluate difference to determine whether bonus applies
            if isle_sizes_i.sum() != isle_sizes.sum():
            # calculate bonus for all island sizes ater throwing away the 0 count
                isle_bonus = (isle_sizes**3).sum()
            else:
                isle_bonus = 0
#             if isle_bonus != 0:
#                 print(f'isle_bonus:{isle_bonus} isle_avgs_i: {isle_sizes_i.sum()}  isle_avgs {isle_sizes.sum()}')
            # update the size distribution from the previous frame
            isle_sizes_i = isle_sizes
######## color island bonus calculation ########

            # let the neural net do its thing on the input imgarray and generate 
            # the next action every eighth frame
#             if frame % 3 == 1:
            nnOutput = net.activate(imgarray)
#             print('Frame', frame, 'NN output:', nnOutput)
#             print('nn', time.time() - start_time, 's'); start_time = time.time()
            output_threshold = 0.5
            step_threshed = list(map(lambda x : 1 if x > output_threshold else 0, nnOutput))
            if counter == 100:
                # get out of the infinite loop by pressing down once
                stepInput = [0, 0, 0, 0,
                             0, 1, 0, 0,
                             0, 0, 0, 0 ]
                # and then reset the counter
                counter = 0
            else:
                stepInput = [step_threshed[0], 0, 0, 0,
                             0,                step_threshed[1],
                             step_threshed[2], step_threshed[3],
                             0, step_threshed[4], 0, 0          ]
#             print('env step input:', stepInput)
#             print('env step:', time.time() - start_time, 's'); start_time = time.time()
                
            # feed the neural net output to the emulator
            ob, rew, done, info = env.step(stepInput)

            # when removing gems:
            if (rew > 1) and (rew != 10000):
                # apply scaled reward to fitness 
                fitness += rew * 500
            # when wildcard/magic block hits bottom:
            elif rew == 10000:
                # apply scaled reward to fitness
                fitness += rew * 5
            # when pressing down to simply speed play:
            else:    
                # apply raw reward to fitness only when the net does so naturally             
                if counter != 100:
                    fitness += rew    
            # apply column penalty to fitness
            fitness -= float(col_pen) * 0.2
            # apply scaled isle creation bonus to fitness
            fitness += float(isle_bonus) * 30
            # when a score is made
            if rew >= 1:
                # reset the counter
                counter = 0
#               # print out the variables 
            # when a reward is earned...
            if rew >= 30:
                # ... give status update
                print(f"Frame {frame}: Earned {rew} reward. Fitness: {fitness:0.3f}")
#             elif rew == 1:
#                 pass
            else:
                counter +=1
            # every 1000 frames or so
#             if frame%9 == 8:
                  # display island bonus and column penalty contributions to fitness
              
              # provide time benchmark...
#             print(f"End Frame {frame-9}-{frame} cycle: {(time.time() - start_time):.3f} s  Fitness: {fitness:0.3f}"); start_time = time.time()
#                 # ... and give status update
#                 print(f'Reward: {rew} Counter: {counter} Fitness: {fitness:0.3f}')
            # if the reward drought is too severe
            if counter > counter_limit:
                # indicate such
                print(f'*** Counter limit reached at frame {frame} ***' )
                # and abort the run
                done = True
            # at the end of the game
            if done:
                # print out the high-level results...
                print(f"ID:{genome_id} ({genome_in_gen}/{popul_size}) Final frame: {frame} Fitness: {fitness:0.3f} Score: {info['score']} ")
                # ... and the game over indicator
                if counter <= counter_limit:
                    print(f'                                        ****** }}}}>>>>>ID {genome_id} GAME OVER after {(time.time() - start_time):.3f} s <<<<<{{{{ ******')
                # record final fitness 
                genome.fitness = fitness
        # iterate genome counter
        genome_counter += 1
#         return fitness
# close any lingering environment if necessary            
try:
    env.close()
except:
    pass
######## END eval_genomes function ########

######## Major parameter specifications ########

# specify game to load with its config file (comment/uncomment as appropriate)
game, state, config_file = 'Columns-Genesis', 'Arcade.Easy.Level0', 'config-Columns-emergent'

# specify checkpoint file if necessary as a string ('0' if not to be used)
# chkpt = '0'
# specify checkpoint index manually
chkpt_index = 12
chkpt = game + '-neat-chkpt-cp-'+ str(chkpt_index)

# specify type of retro environment observation space via index of below list
input_type_index = 0 
# specify observation type
input_types = [retro.Observations.IMAGE, retro.Observations.RAM]

######## END Major parameter specifications ########



# create the retro environment for the chosen game, including starting state
env = retro.make(game,state,obs_type=input_types[input_type_index],use_restricted_actions=retro.Actions.ALL            )
    
# restore the checkpoint if specified
if chkpt != '0':
    print(f"Opening checkpoint file {chkpt}...")
    p = neat.Checkpointer.restore_checkpoint(chkpt)
else:
    # otherwise, load in the configuration for the NEAT algorithm
    config = neat.Config(neat.DefaultGenome,neat.DefaultReproduction,neat.DefaultSpeciesSet,neat.DefaultStagnation,config_file)
    
    # and create the NEAT population with the above specified configuration
    p = neat.Population(config)

# prepare statistics reporting    
p.add_reporter(neat.StdOutReporter(True))

stats = neat.StatisticsReporter()

p.add_reporter(stats)

# and create checkpoint file as things progress
p.add_reporter(neat.Checkpointer(generation_interval = 1, 
                                 time_interval_seconds = 300,
                                 filename_prefix = game + '-neat-chkpt-cp-'))


# set about the evolution process
winner = p.run(eval_genomes)

######## ParallelEvaluator not passing attributes correctly
# or run in parallel by creating a parallel evaluator instance and using its .evaluate()
# p_e = neat.ParallelEvaluator(4, eval_genomes)

# winner = p.run(p_e.evaluate)
                      
                      
# close the render window when finished
try:
    env.render(close=True)
except:
    pass

# pickle the final result
with open(game+'-winner.pkl', 'wb') as output:
    pickle.dump(winner, output, 1)