In [23]:
import gym
import random
import numpy as np
import tflearn
from tflearn.layers.core import fully_connected, input_data, dropout
from tflearn.layers.estimator import regression
from statistics import mean, median
from collections import Counter

LR = 1e-3
env = gym.make('CartPole-v0')
env.reset()
goal_steps = 200 
score_requirement = 50
initial_games = 10000

def some_random_games_first():
    for episode in range(5):
        env.reset()
        for t in range(goal_steps):
            env.render()
            action = env.action_space.sample()
            observation, reward, done, info = env.step(action)
            if done:
                break
    
#some_random_games_first()

def initial_population():
    # [OBS, MOVES]
    training_data = []
    # all scores:
    scores = []
    # just the scores that met our threshold:
    accepted_scores = []
    # iterate through however many games we want:
    for _ in range(initial_games):
        score = 0
        # moves specifically from this environment:
        game_memory = []
        # previous observation that we saw
        prev_observation = []
        # for each frame in 200
        for _ in range(goal_steps):
            # choose random action (0 or 1)
            action = random.randrange(0,2)
            # do it!
            observation, reward, done, info = env.step(action)
            
            # notice that the observation is returned FROM the action
            # so we'll store the previous observation here, pairing
            # the prev observation to the action we'll take.
            if len(prev_observation) > 0 :
                game_memory.append([prev_observation, action])
            prev_observation = observation
            score+=reward
            if done: break

        # IF our score is higher than our threshold, we'd like to save
        # every move we made
        # NOTE the reinforcement methodology here. 
        # all we're doing is reinforcing the score, we're not trying 
        # to influence the machine in any way as to HOW that score is 
        # reached.
        if score >= score_requirement:
            accepted_scores.append(score)
            for data in game_memory:
                # convert to one-hot (this is the output layer for our neural network)
                if data[1] == 1:
                    output = [0,1]
                elif data[1] == 0:
                    output = [1,0]
                    
                # saving our training data
                training_data.append([data[0], output])

        # reset env to play again
        env.reset()
        # save overall scores
        scores.append(score)
    
    # just in case you wanted to reference later
    training_data_save = np.array(training_data)
    np.save('saved.npy',training_data_save)
    
    # some stats here, to further illustrate the neural network magic!
    print('Average accepted score:',mean(accepted_scores))
    print('Median score for accepted scores:',median(accepted_scores))
    print(Counter(accepted_scores))
    
    return training_data

def neural_network_model(input_size):
    network = input_data(shape=[None, input_size, 1], name='input')

    network = fully_connected(network, 128, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 256, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 512, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 256, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 128, activation='relu')
    network = dropout(network, 0.8)

    network = fully_connected(network, 2, activation='softmax')

    network = regression(network, optimizer='adam', learning_rate=LR, 
    loss='categorical_crossentropy', name='targets')

    model = tflearn.DNN(network, tensorboard_dir='log')

    return model

def train_model(training_data, model = False):
    X = np.array([i[0] for i in training_data]).reshape(-1,len(training_data[0][0]), 1)
    y = [i[1] for i in training_data]
    
    if not model:
        model = neural_network_model(input_size=len(X[0]))

    model.fit({'input': X}, {'targets' : y}, n_epoch = 3, show_metric = True, snapshot_step = 500, 
    run_id='openaiex')

    return model

training_data = initial_population()
model = train_model(training_data)

scores = []

choices = []
for each_game in range(100):
    score = 0
    game_memory = []
    prev_obs = []
    env.reset()
    for _ in range(goal_steps):
        #env.render()

        if len(prev_obs)==0:
            action = random.randrange(0,2)
        else:
            action = np.argmax(model.predict(prev_obs.reshape(-1,len(prev_obs),1))[0])

        choices.append(action)
                
        new_observation, reward, done, info = env.step(action)
        prev_obs = new_observation
        game_memory.append([new_observation, action])
        score+=reward
        if done: break

    scores.append(score)

print('Average Score:',sum(scores)/len(scores))
print('choice 1:{}  choice 0:{}'.format(choices.count(1)/len(choices),choices.count(0)/len(choices)))
print(score_requirement)

Average accepted score: 60.842233009708735
Median score for accepted scores: 57.0
Counter({50.0: 43, 51.0: 33, 54.0: 28, 57.0: 27, 53.0: 25, 55.0: 24, 58.0: 19, 52.0: 18, 60.0: 18, 56.0: 18, 63.0: 16, 62.0: 12, 68.0: 9, 67.0: 8, 73.0: 8, 65.0: 8, 71.0: 8, 72.0: 7, 66.0: 7, 59.0: 7, 75.0: 6, 84.0: 6, 61.0: 6, 70.0: 5, 74.0: 5, 64.0: 5, 76.0: 4, 79.0: 4, 69.0: 3, 87.0: 3, 78.0: 2, 91.0: 2, 81.0: 2, 77.0: 2, 82.0: 2, 89.0: 2, 85.0: 1, 90.0: 1, 80.0: 1, 105.0: 1, 98.0: 1, 122.0: 1, 139.0: 1, 111.0: 1, 88.0: 1, 99.0: 1})
---------------------------------
Run id: openaiex
Log directory: log/
INFO:tensorflow:Summary name Accuracy/Adam_0 (raw) is illegal; using Accuracy/Adam_0__raw_ instead.
INFO:tensorflow:Summary name Accuracy_1/Adam_1 (raw) is illegal; using Accuracy_1/Adam_1__raw_ instead.
---------------------------------
Training samples: 49310
Validation samples: 0
--


InvalidArgumentError: You must feed a value for placeholder tensor 'input_1/X' with dtype float and shape [?,4,1]
	 [[node input_1/X (defined at /home/anant/.local/lib/python3.6/site-packages/tflearn/layers/core.py:81)  = Placeholder[dtype=DT_FLOAT, shape=[?,4,1], _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'input_1/X', defined at:
  File "/usr/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.6/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelapp.py", line 497, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.6/dist-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.6/asyncio/base_events.py", line 427, in run_forever
    self._run_once()
  File "/usr/lib/python3.6/asyncio/base_events.py", line 1440, in _run_once
    handle._run()
  File "/usr/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/usr/local/lib/python3.6/dist-packages/tornado/platform/asyncio.py", line 122, in _handle_events
    handler_func(fileobj, events)
  File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.6/dist-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.6/dist-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2901, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-23-3f5c95b4fde2>", line 133, in <module>
    model = train_model(training_data)
  File "<ipython-input-23-3f5c95b4fde2>", line 125, in train_model
    model = neural_network_model(input_size=len(X[0]))
  File "<ipython-input-23-3f5c95b4fde2>", line 94, in neural_network_model
    network = input_data(shape=[None, input_size, 1], name='input')
  File "/home/anant/.local/lib/python3.6/site-packages/tflearn/layers/core.py", line 81, in input_data
    placeholder = tf.placeholder(shape=shape, dtype=dtype, name="X")
  File "/home/anant/.local/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1747, in placeholder
    return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
  File "/home/anant/.local/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 5206, in placeholder
    "Placeholder", dtype=dtype, shape=shape, name=name)
  File "/home/anant/.local/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/anant/.local/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/home/anant/.local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/home/anant/.local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'input_1/X' with dtype float and shape [?,4,1]
	 [[node input_1/X (defined at /home/anant/.local/lib/python3.6/site-packages/tflearn/layers/core.py:81)  = Placeholder[dtype=DT_FLOAT, shape=[?,4,1], _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]


In [22]:
np_train = np.array(training_data)
print(np_train.shape)
X = np.array([i[0] for i in training_data]).reshape(-1,len(training_data[0][0]), 1)
print("X : ", X[0])

(29700, 2)
X :  [[100]
 [ 31]
 [100]
 [ 31]
 [100]
 [ 31]
 [100]
 [ 31]
 [100]
 [ 31]
 [100]
 [ 31]
 [255]
 [  0]
 [  1]
 [  0]
 [255]
 [255]
 [ 29]
 [  0]
 [ 29]
 [128]
 [120]
 [104]
 [120]
 [ 16]
 [ 24]
 [  0]
 [  8]
 [  0]
 [152]
 [ 29]
 [162]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [150]
 [  0]
 [255]
 [  0]
 [ 96]
 [  0]
 [ 96]
 [  0]
 [ 96]
 [  9]
 [224]
 [  0]
 [ 96]
 [  0]
 [ 96]
 [  0]
 [ 96]
 [ 10]
 [ 80]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [ 96]
 [  0]
 [ 75]
 [ 51]
 [  7]
 [  7]
 [  7]
 [  3]
 [  0]
 [  0]
 [  0]
 [255]
 [  0]
 [  0]
 [  0]
 [ 48]
 [  1]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  7]
 [186]
 [198]
 [192]
 [ 48]
 [224]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  0]
 [  9]
 [  6]
 [  0]
 [  0]
 [ 10]
 [  0]
 [  0]
 [  1]
 [  0]
 [  1]
 [ 66]
 [  0]
 [255]
 [  2]
 [115]
 [ 21]]


In [25]:
import numpy as np
training_data = np.load('boxing_save.npy')
#print(file[0][1])

X = np.array([i[0] for i in training_data]).reshape(-1,len(training_data[0][0]), 1)
y = [i[1] for i in training_data]
print("X: ", len(X[13444]))
print("y[0]: ", y[0])

X:  128
y[0]:  [0, 1, 0, 0]
