In [218]:
import gym
import random
import numpy as np
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
from statistics import median, mean
from collections import Counter
import torch
import torch.nn as nn
import torch.nn.functional as F

In [219]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
#device = 'cpu'
#model.to(device)
device

'cuda'

In [250]:
LR = 0.0001
env = gym.make("CartPole-v0")
env.reset()
num_steps = 500
score_requirement = 60
num_games = 15000

In [239]:
def random_moves():
    # Each of these is its own game. Moves are randomly smapled
    for episode in range(5):
        env.reset()
        for t in range(num_steps):
            env.render()

            action = env.action_space.sample()
            
            # steps with the random action
            observation, reward, done, info = env.step(action)
            if done:
                break
                
#random_moves()

In [276]:
def make_training_data():
    training_data = []
    scores = []
    good_scores = []
    for _ in range(num_games):
        # initialize parameters
        score = 0
        memory = []
        prev_obs = []
        for _ in range(num_steps):
            # take a random step
            action = env.action_space.sample()
            observation, reward, done, _ = env.step(action)
            
            # add previous observation (ie game data) to memory
            if len(prev_obs) > 0:
                memory.append([observation, action])
            # TODO: add some sort of reinforcement
            prev_obs = observation
            score += reward
            if done:
                break
        if score >= score_requirement:
            good_scores.append(score)
            for entry in memory:
                if entry[1] == 0:
                    output = [1,0]
                else:
                    output = [0,1]
                training_data.append([entry[0],output])
        env.reset()
        scores.append(score)
    print('average:', mean(good_scores))
    print(Counter(good_scores))
        
    return training_data
                

In [277]:
training_data = make_training_data()
training_data[1]

average: 72.82040816326531
Counter({61.0: 19, 66.0: 17, 63.0: 16, 67.0: 15, 60.0: 15, 73.0: 13, 64.0: 12, 62.0: 11, 68.0: 10, 65.0: 10, 70.0: 8, 69.0: 8, 71.0: 7, 76.0: 7, 80.0: 6, 86.0: 6, 74.0: 5, 82.0: 5, 72.0: 5, 84.0: 4, 88.0: 4, 83.0: 4, 87.0: 4, 77.0: 4, 91.0: 3, 85.0: 3, 92.0: 3, 125.0: 2, 75.0: 2, 78.0: 2, 107.0: 1, 96.0: 1, 182.0: 1, 100.0: 1, 106.0: 1, 79.0: 1, 141.0: 1, 113.0: 1, 94.0: 1, 93.0: 1, 89.0: 1, 95.0: 1, 90.0: 1, 104.0: 1, 116.0: 1})


[array([ 0.04360298, -0.19616465, -0.00608461,  0.26641586], dtype=float32),
 [1, 0]]

In [242]:
X = torch.tensor(np.array([i[0] for i in training_data])).to(device)
y = torch.tensor(np.array([i[1] for i in training_data])).to(device)

In [243]:
X[0]

tensor([ 0.0357, -0.0494, -0.0079,  0.0130], device='cuda:0')

In [266]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = torch.sigmoid(self.linear_relu_stack(x))
        return logits

In [267]:
model = NeuralNetwork(len(training_data[0][0]))
model.to(device)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LR)
epochs = 5

In [268]:
def train(model, x, y, optimizer, criterion):
    model.train()
    model.zero_grad()
    output = model(x).float()
    loss = criterion(torch.squeeze(output), y)
    loss.backward()
    optimizer.step()

    return loss, output

def predict(model, obs):
    temp = obs.unsqueeze(0)
    output = torch.squeeze(torch.sigmoid(model(temp)))
    return int(output.round().cpu().detach())

In [269]:
for _ in range(epochs):
    train(model, X, y.float(), optimizer, criterion)

In [270]:
with torch.no_grad():
    output = torch.squeeze(torch.sigmoid(model(X))).round().cpu().detach()

# accuracy of current model
np.sum(output.numpy() != y.cpu().numpy())/len(y.cpu().numpy())

0.5021642588116252

In [249]:
predict(model,X[8])

1

In [329]:
def neural_network_model(input_size):

    network = input_data(shape=[None, input_size, 1], name='input')

    network = fully_connected(network, 128, activation='relu')
    #network = dropout(network, rate = 0.1)

    network = fully_connected(network, 256, activation='relu')
    #network = dropout(network, rate = 0.1)

    network = fully_connected(network, 128, activation='relu')
    network = dropout(network, 0.9)

    network = fully_connected(network, 2, activation='softmax')
    network = regression(network, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')
    model = tflearn.DNN(network)

    return model

def train_model(training_data, model=False):

    X_train = np.array([i[0] for i in training_data], dtype=np.float32).reshape(-1,len(training_data[0][0]),1)
    y_train = np.array([i[1] for i in training_data], dtype=np.float32)

    if not model:
        model = neural_network_model(input_size = len(X_train[0]))
    print(X_train.shape)
    model.fit({'input': X_train}, {'targets': y_train}, n_epoch=5, snapshot_step=500, show_metric=True)
    return model

In [330]:
X_train = np.array([i[0] for i in training_data], dtype = 'float').reshape(-1,len(training_data[0][0]),1)
type(X_train[0])

numpy.ndarray

In [331]:
y_train = np.array([i[1] for i in training_data])
y_train.shape

(17596, 2)

In [332]:
model = train_model(training_data)

(17596, 4, 1)
---------------------------------
Run id: 1WCUTS
Log directory: /tmp/tflearn_logs/
INFO:tensorflow:Summary name Accuracy_12/Adam_0 (raw) is illegal; using Accuracy_12/Adam_0__raw_ instead.
INFO:tensorflow:Summary name Accuracy_13/Adam_1 (raw) is illegal; using Accuracy_13/Adam_1__raw_ instead.
---------------------------------
Training samples: 246344
Validation samples: 0
--


InvalidArgumentError: 2 root error(s) found.
  (0) INVALID_ARGUMENT: You must feed a value for placeholder tensor 'input_2/X' with dtype float and shape [?,4,1]
	 [[node input_2/X
 (defined at c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\tflearn\layers\core.py:81)
]]
	 [[Crossentropy_1/Mean/_33]]
  (1) INVALID_ARGUMENT: You must feed a value for placeholder tensor 'input_2/X' with dtype float and shape [?,4,1]
	 [[node input_2/X
 (defined at c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\tflearn\layers\core.py:81)
]]
0 successful operations.
0 derived errors ignored.

Errors may have originated from an input operation.

Operation defined at: (most recent call last)
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\runpy.py", line 197, in _run_module_as_main
>>>     return _run_code(code, main_globals, None,
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\runpy.py", line 87, in _run_code
>>>     exec(code, run_globals)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
>>>     app.launch_new_instance()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
>>>     app.start()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
>>>     self.io_loop.start()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
>>>     self.asyncio_loop.run_forever()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\asyncio\base_events.py", line 596, in run_forever
>>>     self._run_once()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\asyncio\base_events.py", line 1890, in _run_once
>>>     handle._run()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\asyncio\events.py", line 80, in _run
>>>     self._context.run(self._callback, *self._args)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
>>>     await self.process_one()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
>>>     await dispatch(*args)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
>>>     await result
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
>>>     reply_content = await reply_content
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
>>>     res = shell.run_cell(code, store_history=store_history, silent=silent)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
>>>     return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 2914, in run_cell
>>>     result = self._run_cell(
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 2960, in _run_cell
>>>     return runner(coro)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\async_helpers.py", line 78, in _pseudo_sync_runner
>>>     coro.send(None)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_cell_async
>>>     has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
>>>     if (await self.run_code(code, result,  async_=asy)):
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
>>>     exec(code_obj, self.user_global_ns, self.user_ns)
>>> 
>>>   File "C:\Users\Alex\AppData\Local\Temp/ipykernel_2124/1928123591.py", line 1, in <module>
>>>     model = train_model(training_data)
>>> 
>>>   File "C:\Users\Alex\AppData\Local\Temp/ipykernel_2124/3777733726.py", line 26, in train_model
>>>     model = neural_network_model(input_size = len(X_train[0]))
>>> 
>>>   File "C:\Users\Alex\AppData\Local\Temp/ipykernel_2124/3777733726.py", line 3, in neural_network_model
>>>     network = input_data(shape=[None, input_size, 1], name='input')
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\tflearn\layers\core.py", line 81, in input_data
>>>     placeholder = tf.placeholder(shape=shape, dtype=dtype, name="X")
>>> 


Operation defined at: (most recent call last)
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\runpy.py", line 197, in _run_module_as_main
>>>     return _run_code(code, main_globals, None,
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\runpy.py", line 87, in _run_code
>>>     exec(code, run_globals)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
>>>     app.launch_new_instance()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
>>>     app.start()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
>>>     self.io_loop.start()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
>>>     self.asyncio_loop.run_forever()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\asyncio\base_events.py", line 596, in run_forever
>>>     self._run_once()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\asyncio\base_events.py", line 1890, in _run_once
>>>     handle._run()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\asyncio\events.py", line 80, in _run
>>>     self._context.run(self._callback, *self._args)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
>>>     await self.process_one()
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
>>>     await dispatch(*args)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
>>>     await result
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
>>>     reply_content = await reply_content
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
>>>     res = shell.run_cell(code, store_history=store_history, silent=silent)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
>>>     return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 2914, in run_cell
>>>     result = self._run_cell(
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 2960, in _run_cell
>>>     return runner(coro)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\async_helpers.py", line 78, in _pseudo_sync_runner
>>>     coro.send(None)
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_cell_async
>>>     has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
>>>     if (await self.run_code(code, result,  async_=asy)):
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
>>>     exec(code_obj, self.user_global_ns, self.user_ns)
>>> 
>>>   File "C:\Users\Alex\AppData\Local\Temp/ipykernel_2124/1928123591.py", line 1, in <module>
>>>     model = train_model(training_data)
>>> 
>>>   File "C:\Users\Alex\AppData\Local\Temp/ipykernel_2124/3777733726.py", line 26, in train_model
>>>     model = neural_network_model(input_size = len(X_train[0]))
>>> 
>>>   File "C:\Users\Alex\AppData\Local\Temp/ipykernel_2124/3777733726.py", line 3, in neural_network_model
>>>     network = input_data(shape=[None, input_size, 1], name='input')
>>> 
>>>   File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\tflearn\layers\core.py", line 81, in input_data
>>>     placeholder = tf.placeholder(shape=shape, dtype=dtype, name="X")
>>> 

Original stack trace for 'input_2/X':
  File "c:\users\alex\appdata\local\programs\python\python39\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "c:\users\alex\appdata\local\programs\python\python39\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
    app.start()
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
    self.io_loop.start()
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "c:\users\alex\appdata\local\programs\python\python39\lib\asyncio\base_events.py", line 596, in run_forever
    self._run_once()
  File "c:\users\alex\appdata\local\programs\python\python39\lib\asyncio\base_events.py", line 1890, in _run_once
    handle._run()
  File "c:\users\alex\appdata\local\programs\python\python39\lib\asyncio\events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
    await self.process_one()
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
    await dispatch(*args)
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
    await result
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
    reply_content = await reply_content
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 2914, in run_cell
    result = self._run_cell(
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 2960, in _run_cell
    return runner(coro)
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\async_helpers.py", line 78, in _pseudo_sync_runner
    coro.send(None)
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 3377, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\IPython\core\interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\Alex\AppData\Local\Temp/ipykernel_2124/1928123591.py", line 1, in <module>
    model = train_model(training_data)
  File "C:\Users\Alex\AppData\Local\Temp/ipykernel_2124/3777733726.py", line 26, in train_model
    model = neural_network_model(input_size = len(X_train[0]))
  File "C:\Users\Alex\AppData\Local\Temp/ipykernel_2124/3777733726.py", line 3, in neural_network_model
    network = input_data(shape=[None, input_size, 1], name='input')
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\tflearn\layers\core.py", line 81, in input_data
    placeholder = tf.placeholder(shape=shape, dtype=dtype, name="X")
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\tensorflow\python\ops\array_ops.py", line 3289, in placeholder
    return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 6892, in placeholder
    _, _, _op, _outputs = _op_def_library._apply_op_helper(
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 744, in _apply_op_helper
    op = g._create_op_internal(op_type_name, inputs, dtypes=None,
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\tensorflow\python\framework\ops.py", line 3697, in _create_op_internal
    ret = Operation(
  File "c:\users\alex\appdata\local\programs\python\python39\lib\site-packages\tensorflow\python\framework\ops.py", line 2101, in __init__
    self._traceback = tf_stack.extract_stack_for_node(self._c_op)


In [205]:
scores = []
choices = []
for _ in range(10):
    env.reset()
    score = 0
    prev_obs = []
    for _ in range(num_steps):
        env.render()

        if len(prev_obs) == 0:
            action = env.action_space.sample()
        else:
            action = predict(model, torch.tensor(prev_obs).to(device))
            #print(action)
        observation, reward, done, _ = env.step(action)
        prev_obs = observation
        score += reward
        if done:
            break
        
    scores.append(score)
print(mean(scores))

10.2
