In [15]:
import gym
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from keras.optimizers import Adam
from keras.callbacks import TensorBoard
import numpy as np
import itertools

env = gym.make('FrozenLake-v0')
print(env.observation_space)
print(env.action_space)
env.reset()  # this is what will reset the environment to the starting state––always use this first.

x = 0
y = 0

winpos = []
winmoves = []

while x < 10:
    env.reset()

    moves = []
    pos = []
    prevobs = 0
    while True:

        y = env.action_space.sample()
        obs, rew, done, info = env.step(y)
        moves.append(y)
        pos.append([prevobs])
        prevobs = obs
        # obs, rew, done, info = env.step(env.action_space.sample()) # take an action
        env.render()

        if rew > 0:
            winpos.extend(pos)
            winmoves.extend(moves)

        print(rew)
        if done:
            x = (rew + x)
            print(x)
            print(y)
            # print(moves)
            # print(pos)
            print("Winning moves: ", winmoves)
            print("Winning positions: ", winpos)
            print("Previous Observation: ", prevobs)
            break
LR = 0.001


# now that we've created data, let's create our neural net
# this will take an parameter of input_size, to make it useful for other environments
# but this will end up being 4 (our observations)
def neural_net_model(input_size):
    # to avoid awkward imports, we're going to just use the tf prefix for all
    # there's a little bit more going on here than in our titanic example, so let's take it slow

    # the first layer is our input layer
    # we have to tell it the shape of our data,
    # that's always [None, the number of inputs, other input layers, more layer, etc]
    # here we have a shape of 4 input (the observation) by 1 (one observation each time)
    # in this case, input_size, will be 4, but by using a variable, we could do other shapes
    #network = tf.input_data(shape=[None, input_size], name='input')
    network = Sequential()
    network.add(Dense(output_dim=input_size, activation='relu'))

    # a hidden layer with 128 nodes, using regular linear regression (this is default)
    #network = tf.fully_connected(network, 128, activation='relu')
    network.add(Dense(output_dim=128, activation='relu'))
    # so dropout is a bit weird. basically, to prevent overfitting (over analyzing the problem),
    # we'll randomly drop part of the network. We do this to attempt to get a more organic,
    # less strict network that won't create rules that are too harsh.
    # more info: https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf
    # we could experiment with this, I think
    #network = tf.dropout(network, 0.8)
    network.add(Dropout(0.2))

    # alright lets make a bunch more layers, keeping it symmetrical
    #network = tf.fully_connected(network, 256, activation='relu', name="hlayer1")
    #network = tf.dropout(network, 0.8)
    network.add(Dense(output_dim=256, activation='relu'))
    network.add(Dropout(0.2))

    #network = tf.fully_connected(network, 512, activation='relu', name="hlayer2")
    #network = tf.dropout(network, 0.8)
    network.add(Dense(output_dim=512, activation='relu'))
    network.add(Dropout(0.2))

    #network = tf.fully_connected(network, 256, activation='relu', name="hlayer3")
    #network = tf.dropout(network, 0.8)
    network.add(Dense(output_dim=256, activation='relu'))
    network.add(Dropout(0.2))

    #network = tf.fully_connected(network, 128, activation='relu', name="hlayer4")
    #network = tf.dropout(network, 0.8)
    network.add(Dense(output_dim=128, activation='relu'))
    network.add(Dropout(0.2))

    # this is our output layer.
    # it contains an array like [l, r], probabilities for each lef or right
    # if you're curious about activations:
    # https://github.com/Kulbear/deep-learning-nano-foundation/wiki/ReLU-and-Softmax-Activation-Functions
    # essentially the softmax activation will squash our outputs into a probability distrubution
    #network = tf.fully_connected(network, 4, activation='softmax', name="out")
    #network = tf.regression(network, learning_rate=LR)
    network.add(Dense(output_dim=4, activation='softmax'))
    network.compile(loss ='mean_squared_error', optimizer=Adam(lr=0.001), metrics =['accuracy'])

    # make a Deep Neaural Net wtih with network
    #model = tf.DNN(network, tensorboard_dir='log')

    return network


def train_model(moves, pos):
    # this is the awkard part, we need to organize that data a bit better,
    # in order to actually feed it to the net

    #pos = list(itertools.chain.from_iterable(pos))
    pos = map(np.array, pos)
    
    model = neural_net_model(input_size=1)
    #model.fit(pos, moves, n_epoch=5, show_metric=True, run_id='openai_learning')
    model.fit(pos, moves, batch_size = 512, nb_epoch = 5, verbose = 1)
    return model


def play_with_model(model):
    scores = 0
    choices = []
    print("Playing wtih Trained Model.....")
    for each_game in range(10):
        score = 0
        game_memory = []
        prev_obs = 0
        env.reset()
        for _ in range(100):
            #env.render()

            if prev_obs == 0:
                action = env.action_space.sample()
            else:
                # this clever call finds the index of the max argument
                # since predict will return something like [][0.23.., 0.76..]]
                # it will return 0 if the first is bigger, 1 if the second
                # which is the same as 'left' or 'right' in the action space
                action = np.argmax(model.predict([[prev_obs]])[0])

            choices.append(action)

            new_observation, reward, done, info = env.step(action)
            prev_obs = new_observation
            game_memory.append([new_observation, action])
            score += reward
            if done: break

        scores += score

    print(scores)


onehot = []
for m in winmoves:
    onehot.append([0, 0, 0, 0])
    onehot[len(onehot) - 1][m] = 1
model = train_model(onehot, winpos)
play_with_model(model)


Discrete(16)
Discrete(4)
  (Down)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Right)
SFFF
FHFH
[41mF[0mFFH
HFFG
0.0
  (Right)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Left)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Left)
SFFF
FHFH
[41mF[0mFFH
HFFG
0.0
  (Left)
SFFF
FHFH
FFFH
[41mH[0mFFG
0.0
0.0
0
Winning moves:  []
Winning positions:  []
Previous Observation:  12
  (Right)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Right)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Up)
SF[41mF[0mF
FHFH
FFFH
HFFG
0.0
  (Up)
SF[41mF[0mF
FHFH
FFFH
HFFG
0.0
  (Up)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Up)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Down)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Up)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Up)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Right)
SFFF
[41mF[0

[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Right)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Right)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Up)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
0.0
1.0
2
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14]]
Previous Observation:  5
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Up)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Up)
SF[41mF[0mF
FHFH
FFFH
HFFG
0.0
  (Down)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Down)
SFFF
F[41mH[0mFH
FFFH
HFFG
0.0
1.0
1
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14]]
Previous Observation:  5
  (Up)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Down)
SFFF
F[41mH[0mFH
FFFH
HFFG
0.0
1.0
1
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3]
Winning positions:  [[0], [4], [8], [8], [4], [

  (Right)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Right)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
0.0
2.0
2
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1, 1, 2, 0, 1, 1, 3]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14], [0], [0], [0], [1], [2], [6], [10], [14], [13], [14]]
Previous Observation:  5
  (Right)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Down)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Up)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Left)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Down)
SFFF
F[41mH[0mFH
FFFH
HFFG
0.0
2.0
1
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1, 1, 2, 0, 1, 1, 3]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14], [0], [0], [0], [1], [2], [6], [10], [14], [13], [14]]
Previous Observation:  5
  (Left)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Right)
SFFF
FHFH
[41mF[0mFFH
HFFG
0.0
  

  (Right)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Down)
SFFF
F[41mH[0mFH
FFFH
HFFG
0.0
3.0
1
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1, 1, 2, 0, 1, 1, 3, 1, 2, 1, 2, 1, 2, 3]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14], [0], [0], [0], [1], [2], [6], [10], [14], [13], [14], [0], [4], [8], [9], [10], [14], [14]]
Previous Observation:  5
  (Left)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Up)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Left)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Up)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Up)
SFFF
F[41mH[0mFH
FFFH
HFFG
0.0
3.0
3
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1, 1, 2, 0, 1, 1, 3, 1, 2, 1, 2, 1, 2, 3]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14], [0], [0], [0], [1], [2], [6], [10], [14], [13], [14], [0], [4], [8], [9], [10], [14], [14]]
P

[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Up)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Down)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Up)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Up)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Up)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Right)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Right)
SFFF
FHFH
[41mF[0mFFH
HFFG
0.0
  (Up)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Right)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Up)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Up)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Up)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Up)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Down)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
0.0
4.0
2
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1, 1, 2, 0, 1, 1, 3, 1, 2, 1, 2, 1, 2, 3, 1, 1, 1, 0, 2, 0, 2, 0, 3, 2, 2, 2]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14], [0], [0], [0], [1], [2], [6], [10], [14], [

S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Right)
SF[41mF[0mF
FHFH
FFFH
HFFG
0.0
  (Left)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Left)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
0.0
5.0
2
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1, 1, 2, 0, 1, 1, 3, 1, 2, 1, 2, 1, 2, 3, 1, 1, 1, 0, 2, 0, 2, 0, 3, 2, 2, 2, 2, 0, 3, 1, 3, 3, 0, 3, 3, 2, 1, 0, 0, 1]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14], [0], [0], [0], [1], [2], [6], [10], [14], [13], [14], [0], [4], [8], [9], [10], [14], [14], [0], [4], [4], [4], [8], [9], [13], [14], [14], [13], [13], [14], [0], [1], [1], [2], [6], [2], [1], [0], [1], [2], [2], [6], [10], [14]]
Previous Observation:  5
  (Up)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Down)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Down)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Down)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Down)
SFFF
FHFH
[41mF[

SFFF
FHFH
[41mF[0mFFH
HFFG
0.0
  (Right)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Right)
SFFF
FHFH
[41mF[0mFFH
HFFG
0.0
  (Left)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Down)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
0.0
5.0
2
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1, 1, 2, 0, 1, 1, 3, 1, 2, 1, 2, 1, 2, 3, 1, 1, 1, 0, 2, 0, 2, 0, 3, 2, 2, 2, 2, 0, 3, 1, 3, 3, 0, 3, 3, 2, 1, 0, 0, 1]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14], [0], [0], [0], [1], [2], [6], [10], [14], [13], [14], [0], [4], [8], [9], [10], [14], [14], [0], [4], [4], [4], [8], [9], [13], [14], [14], [13], [13], [14], [0], [1], [1], [2], [6], [2], [1], [0], [1], [2], [2], [6], [10], [14]]
Previous Observation:  5
  (Down)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Down)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Left)
SFFF
FHFH
[41mF[0mFFH
HFFG
0.0
  (Down)
SFFF
FHFH
FFFH
[41mH[0mFFG
0.0
5.0
1
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1

Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1, 1, 2, 0, 1, 1, 3, 1, 2, 1, 2, 1, 2, 3, 1, 1, 1, 0, 2, 0, 2, 0, 3, 2, 2, 2, 2, 0, 3, 1, 3, 3, 0, 3, 3, 2, 1, 0, 0, 1]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14], [0], [0], [0], [1], [2], [6], [10], [14], [13], [14], [0], [4], [8], [9], [10], [14], [14], [0], [4], [4], [4], [8], [9], [13], [14], [14], [13], [13], [14], [0], [1], [1], [2], [6], [2], [1], [0], [1], [2], [2], [6], [10], [14]]
Previous Observation:  5
  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Up)
SF[41mF[0mF
FHFH
FFFH
HFFG
0.0
  (Left)
SFFF
FH[41mF[0mH
FFFH
HFFG
0.0
  (Right)
SFFF
FHF[41mH[0m
FFFH
HFFG
0.0
5.0
2
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1, 1, 2, 0, 1, 1, 3, 1, 2, 1, 2, 1, 2, 3, 1, 1, 1, 0, 2, 0, 2, 0, 3, 2, 2, 2, 2, 0, 3, 1, 3, 3, 0, 3, 3, 2, 1, 0, 0, 1]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14], [0], [0], [0], [1], [2], [6], [10], [14], [13], [14],

  (Down)
SFFF
FHFH
FFFH
[41mH[0mFFG
0.0
6.0
1
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1, 1, 2, 0, 1, 1, 3, 1, 2, 1, 2, 1, 2, 3, 1, 1, 1, 0, 2, 0, 2, 0, 3, 2, 2, 2, 2, 0, 3, 1, 3, 3, 0, 3, 3, 2, 1, 0, 0, 1, 0, 0, 0, 1, 2, 2, 1]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14], [0], [0], [0], [1], [2], [6], [10], [14], [13], [14], [0], [4], [8], [9], [10], [14], [14], [0], [4], [4], [4], [8], [9], [13], [14], [14], [13], [13], [14], [0], [1], [1], [2], [6], [2], [1], [0], [1], [2], [2], [6], [10], [14], [0], [0], [4], [8], [9], [10], [14]]
Previous Observation:  12
  (Down)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Up)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
0.0
6.0
2
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1, 1, 2, 0, 1, 1, 3, 1, 2, 1, 2, 1, 2, 3, 1, 1, 1, 0, 2, 0, 2, 0, 3, 2, 2, 2, 2, 0, 3, 1, 3, 3, 0, 3, 3, 2, 1, 0, 0, 1, 0, 0, 0, 1, 2, 2, 1]
Winning positions:  [[0], [4], [8], [8], [4], [8

[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Down)
[41mS[0mFFF
FHFH
FFFH
HFFG
0.0
  (Down)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Right)
SF[41mF[0mF
FHFH
FFFH
HFFG
0.0
  (Right)
SFFF
FH[41mF[0mH
FFFH
HFFG
0.0
  (Up)
SFFF
F[41mH[0mFH
FFFH
HFFG
0.0
6.0
3
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1, 1, 2, 0, 1, 1, 3, 1, 2, 1, 2, 1, 2, 3, 1, 1, 1, 0, 2, 0, 2, 0, 3, 2, 2, 2, 2, 0, 3, 1, 3, 3, 0, 3, 3, 2, 1, 0, 0, 1, 0, 0, 0, 1, 2, 2, 1]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14], [0], [0], [0], [1], [2], [6], [10], [14], [13], [14], [0], [4], [8], [9], [10], [14], [14], [0], [4], [4], [4], [8], [9], [13], [14], [14], [13], [13], [14], [0], [1], [1], [2], [6], [2], [1], [0], [1], [2], [2], [6], [10], [14], [0], [0], [4], [8], [9], [10], [14]]
Previous Observation:  5
  (Up)
S[41mF[0mFF
FHFH
FFFH
HFFG
0.0
  (Down)
SF[41mF[0mF
FHFH
FFFH
HFFG
0.0
  (Right)
SFFF
FH[41mF[0mH
FFFH
HFFG
0.0
  (Down)
SF

SFFF
FHFH
[41mF[0mFFH
HFFG
0.0
  (Left)
SFFF
FHFH
[41mF[0mFFH
HFFG
0.0
  (Left)
SFFF
FHFH
[41mF[0mFFH
HFFG
0.0
  (Down)
SFFF
FHFH
[41mF[0mFFH
HFFG
0.0
  (Left)
SFFF
FHFH
[41mF[0mFFH
HFFG
0.0
  (Left)
SFFF
[41mF[0mHFH
FFFH
HFFG
0.0
  (Right)
SFFF
F[41mH[0mFH
FFFH
HFFG
0.0
8.0
2
Winning moves:  [2, 2, 0, 0, 0, 2, 2, 3, 2, 1, 3, 0, 2, 2, 1, 1, 2, 0, 1, 1, 3, 1, 2, 1, 2, 1, 2, 3, 1, 1, 1, 0, 2, 0, 2, 0, 3, 2, 2, 2, 2, 0, 3, 1, 3, 3, 0, 3, 3, 2, 1, 0, 0, 1, 0, 0, 0, 1, 2, 2, 1, 0, 0, 0, 2, 2, 1, 1, 0, 1, 2, 1, 3, 1, 3, 3, 2, 2, 2, 1, 0, 2, 2, 1, 2, 1]
Winning positions:  [[0], [4], [8], [8], [4], [8], [9], [13], [14], [14], [14], [0], [0], [0], [1], [2], [6], [10], [14], [13], [14], [0], [4], [8], [9], [10], [14], [14], [0], [4], [4], [4], [8], [9], [13], [14], [14], [13], [13], [14], [0], [1], [1], [2], [6], [2], [1], [0], [1], [2], [2], [6], [10], [14], [0], [0], [4], [8], [9], [10], [14], [0], [0], [0], [0], [4], [8], [9], [10], [14], [14], [14], [14], [0], [4], [0], [0], [



ValueError: Please provide as model inputs either a single array or a list of arrays. You passed: x=<map object at 0x10e0ae9b0>