In [1]:
import numpy as np
from copy import copy
import tensorflow as tf
from importlib import reload
from random import randint, sample, shuffle
import matplotlib.pyplot as plt
import pickle

%matplotlib inline
size = plt.rcParams["figure.figsize"]
size[0] = 2
size[1] = 2
plt.rcParams["figure.figsize"] = size

import environment as env
import visual
import trainer as train
import agents.random_plus as rp
import agents.Q as Q
import agents.human as human
import agents.activated as activated
import util
import agents.perfect as perfect

In [28]:
_ = reload(env)
_ = reload(Q)
_ = reload(visual)
_ = reload(activated)
_ = reload(util)
_ = reload(train)
_ = reload(perfect)

## Load and Play

In [9]:
# Create an agent
agent1 = Q.Q([16, 100, 100, 16], gamma = .6, epsilon = 0.1, beta = 0.0)
agent1.load("4x4_2.npz")
# agent2 = Q.Q([16, 100, 200, 16], gamma = .6, epsilon = 0.1, beta = 0.0)
# agent2.load("4x4_server_p2_17.npz")
# Create a random playing agent
RandomAgent = rp.RandomAgentPlus()
# Create Human agent
Human = human.Human()
# Create an environment
e = env.Env(4)

In [11]:
gui = visual.GameWithConfidences(e, agent1, Human, -1, piece_size = 100)

In [19]:
e.play(agent1, RandomAgent, 1000, trainer_a1 = train.Trainer(agent1, record = False).get_episode(1e-6))

Playing ********** Done


## Train Against Eachother

In [None]:
# Create both agents
a1 = Q.Q([9, 100, 9], gamma = .5, epsilon = 0.1, beta = .01)
a2 = Q.Q([9, 100, 9], gamma = .5, epsilon = 0.1, beta = .01)
# Create the trainers
t1 = train.Trainer(a1)
t2 = train.Trainer(a2)
# Create the environment
train_env = env.Env(3)

In [None]:
# Train the agents
train_env.play(a1, a2, 1000, trainer_a1 = t1.get_episode(rotate = True),
               trainer_a2 = t2.get_episode(rotate = True), final_reward = True)

In [None]:
# Watch the agents play
gui = visual.GameWithConfidences(train_env, a1, a2, -1, piece_size = 100)

In [None]:
# Watch winning agent play random opponent
ra = rp.RandomAgentPlus()
gui = visual.GameWithConfidences(train_env, ra, a2, -1, piece_size = 100)

In [None]:
# Play human against winning agent
h = human.Human()
gui = visual.GameWithConfidences(train_env, a1, h, -1, piece_size = 100)

## Analyze Perfect Move Dictionaries

In [None]:
# Get move dictionary
moves = util.get_move_dict("agents/perfect/3.txt", size = 3)

In [None]:
_ = reload(env)
_ = reload(Q)
_ = reload(visual)
_ = reload(activated)
_ = reload(util)
_ = reload(train)
_ = reload(perfect)

In [6]:
e = env.Env(5)
h = human.Human()
r = rp.RandomAgentPlus()
a1 = Q.Q([25, 100, 200, 500, 100, 25], gamma = .6, epsilon = 0.1, beta = 0.0)
a2 = Q.Q([25, 100, 200, 500, 100, 25], gamma = .6, epsilon = 0.1, beta = 0.0)
t1 = train.Trainer(a1, learn_rate = 1e-11, record = False, change_agent_epsilon = True, epsilon_func = lambda x: min(1.0, 10.0 / x))
t2 = train.Trainer(a2, learn_rate = 1e-11, record = False, change_agent_epsilon = True, epsilon_func = lambda x: min(1.0, 10.0 / x))
p = perfect.Perfect("agents/perfect/3.txt", 3)

In [7]:
e.play(a1, a2, 100, trainer_a1 = t1.get_episode(), trainer_a2 = t2.get_episode())

Playing ********** Done


In [None]:
def train_perfect_move(perfect_agent, agent_trainer):
    state = util.bin_to_array(choice(list(perfect_agent.moves)))
    move = perfect_agent.get_move(state)
    reward = 10
    agent_trainer.offline([state], [move], [reward], silence = True, rotate = True)

def train_illegal_move(agent_trainer):
    state = np.int32(np.round(np.random.normal(loc = .5, scale = .01, size = [3, 3])))
    if np.sum(state) >= 9:
        state[0, 0] = 0
    # Make illegal move
    arg = choice(state.argsort(None)[-int(np.sum(state)):].tolist())
    action = np.zeros([state.size], dtype = np.int32)
    action[arg] = 1
    move = np.reshape(action, state.shape)
    reward = -100
    t.offline([state], [move], [reward], silence = True, rotate = True)

In [None]:
ITERATIONS = 10000
#for i in range(ITERATIONS):
#    train_illegal_move(t)
for i in range(ITERATIONS):
    train_perfect_move(p, t)

In [None]:
gui = visual.GameWithConfidences(e, a, h, -1, piece_size = 100)