In [1]:
# import libraries
import random
import numpy as np
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from statistics import median, mean
from collections import Counter
from IPython.display import clear_output # only for jupyter notebook

Instructions for updating:
Colocations handled automatically by placer.


In [2]:
# import game env
from puzzle import GameGrid
env = GameGrid()
env.reset()

In [3]:
# display current state
env.display_state()


[0, 0, 0, 0]
[2, 0, 0, 0]
[0, 0, 0, 0]
[0, 2, 0, 0]


# Generate Random Data
To check if its working properly

In [4]:
def some_random_games_first():
    # Each of these is its own game.
    for episode in range(5):
        env.reset()
        # this is each frame, up to 200...but we wont make it that far.
        for t in range(200):
            # This will display the environment
            # Only display if you really want to see it.
            # Takes much longer to display it.
            clear_output()
            env.display_state()
            
            # This will just create a sample action in any environment.
            # In this environment, the action can be 0 or 1, which is left or right
            action = env.action_space()
            
            # this executes the environment with an action, 
            # and returns the observation of the environment, 
            # the reward, if the env is over, and other info.
            
            observation, reward, done, info = env.step(action)
            if done:
                break

In [5]:
some_random_games_first()


[4, 8, 16, 2]
[32, 128, 8, 8]
[4, 8, 64, 4]
[2, 16, 4, 2]


# Generate and Save Training Data

In [27]:
score_requirement = 16 # Save (state, action) pair only if score is higher than score_requirement
initial_games = 100 # number of games played
goal_steps = 1000 # number of steps in each game

In [29]:
def initial_population():
    # [OBS, MOVES]
    training_data = []
    # all scores:
    scores = []
    # just the scores that met our threshold:
    accepted_scores = []
    # iterate through however many games we want:
    for _ in range(initial_games):
        score = 0
        # moves specifically from this environment:
        game_memory = []
        # previous observation that we saw
        prev_observation = []
        # for each frame in 200
        for _ in range(goal_steps):
            # choose random action (0 or 1)
            action = env.action_space()
            # do it!
            observation, reward, done, info = env.step(action)
            
            # notice that the observation is returned FROM the action
            # so we'll store the previous observation here, pairing
            # the prev observation to the action we'll take.
            if len(prev_observation) > 0 :
                game_memory.append([prev_observation, action])
            prev_observation = observation
            score+=reward
            if done: break

        # IF our score is higher than our threshold, we'd like to save
        # every move we made
        # NOTE the reinforcement methodology here. 
        # all we're doing is reinforcing the score, we're not trying 
        # to influence the machine in any way as to HOW that score is 
        # reached.
        if score >= score_requirement:
            accepted_scores.append(score)
            for data in game_memory:
                # Create one hor vector for actions
                # ["'w'", "'s'", "'d'", "'a'"] === [UP, DOWN, RIGHT, LEFT]
                if data[1] == "'w'":
                    output = [1,0,0,0]
                elif data[1] == "'s'":
                    output = [0,1,0,0]
                elif data[1] == "'d'":
                    output = [0,0,1,0]
                elif data[1] == "'a'":
                    output = [0,0,0,1]
                    
                # saving our training data
                training_data.append([data[0], output])

        # reset env to play again
        env.reset()
        # save overall scores
        scores.append(score)
    
    # just in case you wanted to reference later
    training_data_save = np.array(training_data)
    np.save('data/saved.npy',training_data_save)
    
    # some stats here, to further illustrate the neural network magic!
    print('Average accepted score:',mean(accepted_scores))
    print('Median score for accepted scores:',median(accepted_scores))
    print(Counter(accepted_scores))
    
    return

initial_population()

Average accepted score: 11470.04
Median score for accepted scores: 7624.0
Counter({4972: 1, 9614: 1, 34830: 1, 7538: 1, 13420: 1, 4952: 1, 23636: 1, 7040: 1, 6834: 1, 13160: 1, 3968: 1, 9988: 1, 6276: 1, 3746: 1, 3046: 1, 3654: 1, 54452: 1, 14382: 1, 24646: 1, 4726: 1, 5294: 1, 17950: 1, 15938: 1, 10004: 1, 24598: 1, 14882: 1, 9374: 1, 4434: 1, 23388: 1, 520: 1, 9968: 1, 15558: 1, 11544: 1, 18732: 1, 11076: 1, 3758: 1, 4088: 1, 10070: 1, 5102: 1, 2436: 1, 12262: 1, 7004: 1, 5778: 1, 5862: 1, 18810: 1, 35746: 1, 9818: 1, 5022: 1, 3880: 1, 17042: 1, 6018: 1, 4742: 1, 5042: 1, 5130: 1, 4334: 1, 1674: 1, 13206: 1, 3270: 1, 15860: 1, 5342: 1, 10090: 1, 15858: 1, 4958: 1, 11758: 1, 44238: 1, 7492: 1, 14486: 1, 5044: 1, 4982: 1, 45816: 1, 26186: 1, 17006: 1, 3748: 1, 5792: 1, 5940: 1, 21482: 1, 9894: 1, 29470: 1, 6812: 1, 11036: 1, 4860: 1, 3886: 1, 2794: 1, 7038: 1, 15166: 1, 34906: 1, 7710: 1, 4548: 1, 9994: 1, 3556: 1, 6062: 1, 5806: 1, 9626: 1, 5386: 1, 10234: 1, 5288: 1, 9150: 1, 4904: 1