# 性能优化版

此版主要优化moves的生成。

以前moves生成约400行代码，目前约50行代码。性能提高，貌似1000倍。

## 1. First load the core libraries

In [1]:
# -*- coding: utf-8 -*-
# %matplotlib inline

import numpy as np
np.set_printoptions(suppress=True)

from shutil import copyfile
import random


from keras.utils import plot_model

from game import Game, GameState
from agent import Agent
from memory import Memory
from model import Residual_CNN
from funcs import playMatches, playMatchesBetweenVersions

import loggers as lg

from settings import run_folder, run_archive_folder
import initialise
import pickle
import importlib
print ('I am ready 2.')


Using TensorFlow backend.


I am ready 2.


## 2. Now run this block to start the learning process

This block loops for ever, continually learning from new game data.

The current best model and memories are saved in the run folder so you can kill the process and restart from the last checkpoint.

In [2]:
lg.logger_main.info('=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*')
lg.logger_main.info('=*=*=*=*=*=.      NEW LOG      =*=*=*=*=*')
lg.logger_main.info('=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*')

env = Game()

# If loading an existing neural network, copy the config file to root
if initialise.INITIAL_RUN_NUMBER != None:
    copyfile(run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py', './config.py')

import config

######## LOAD MEMORIES IF NECESSARY ########

if initialise.INITIAL_MEMORY_VERSION == None:
    memory = Memory(config.MEMORY_SIZE)
else:
    print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...')
    memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p",   "rb" ) )

######## LOAD MODEL IF NECESSARY ########

# create an untrained neural network objects from the config file
# current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (32,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
# best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (32,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
# 上面的32的hard coding，很害人！！！
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)


#If loading an existing neural netwrok, set the weights from that model
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())
#otherwise just ensure the weights on the two players are the same
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

#copy the config file to the run folder
copyfile('./config.py', run_folder + 'config.py')
plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True)

print('\n')

######## CREATE THE PLAYERS ########

current_player = Agent('current_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)
#user_player = User('player1', env.state_size, env.action_size)
iteration = 0

while 1:

    iteration += 1
    importlib.reload(lg)
    importlib.reload(config)
    
    print('ITERATION NUMBER ' + str(iteration))
    
    lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
    print('BEST PLAYER VERSION ' + str(best_player_version))

    ######## SELF PLAY ########
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory, goes_first=1)
    print('\n')
    
    memory.clear_stmemory()
    
    if len(memory.ltmemory) >= config.MEMORY_SIZE:

        ######## RETRAINING ########
        # 指定内存满了时，即达到一定盘面（步数）时，就开始训练。每个盘面（步法）的分值，就是其终局的分值。
        print('RETRAINING...')
        current_player.replay(memory.ltmemory)
        print('')

        if iteration % 5 == 0:
            pickle.dump( memory, open( run_folder + "memory/memory" + str(iteration).zfill(4) + ".p", "wb" ) )
      
        memory_samp = random.sample(memory.ltmemory, min(1000, len(memory.ltmemory)))
        
        for s in memory_samp:
            current_value, current_probs, _ = current_player.get_preds(s['state'])
            best_value, best_probs, _ = best_player.get_preds(s['state'])

            s['state'].render(lg.logger_memory)
            
        ######## TOURNAMENT ########
        print('锦标赛，联赛 TOURNAMENT...')
        scores, _, points, sp_scores = playMatches(current_player, best_player, config.EVAL_EPISODES, lg.logger_tourney, turns_until_tau0 = 0, memory = None, goes_first=1)
        print('\nSCORES')
        print(scores)
        print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES')
        print(sp_scores)
        #print(points)

        print('\n\n')

        if scores['current_player'] > scores['best_player'] * config.SCORING_THRESHOLD:
            best_player_version = best_player_version + 1
            best_NN.model.set_weights(current_NN.model.get_weights())
            best_NN.write(env.name, best_player_version)

    else:
        print('MEMORY SIZE: ' + str(len(memory.ltmemory)))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

LOADING MODEL VERSION 157...


ITERATION NUMBER 1
BEST PLAYER VERSION 157
SELF PLAYING 100 EPISODES...
1 

KeyboardInterrupt: 

In [None]:
# performance tune
! pip install line-profiler
# ref: http://mortada.net/easily-profile-python-code-in-jupyter.html

In [8]:

env = Game()

# If loading an existing neural network, copy the config file to root
if initialise.INITIAL_RUN_NUMBER != None:
    copyfile(run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py', './config.py')

import config

######## LOAD MEMORIES IF NECESSARY ########

if initialise.INITIAL_MEMORY_VERSION == None:
    memory = Memory(config.MEMORY_SIZE)
else:
    print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...')
    memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p",   "rb" ) )

######## LOAD MODEL IF NECESSARY ########

# create an untrained neural network objects from the config file
# current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (32,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
# best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (32,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
# 上面的32的hard coding，很害人！！！
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)


#If loading an existing neural netwrok, set the weights from that model
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())
#otherwise just ensure the weights on the two players are the same
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

#copy the config file to the run folder
copyfile('./config.py', run_folder + 'config.py')
plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True)

print('\n')

######## CREATE THE PLAYERS ########

current_player = Agent('current_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)
#user_player = User('player1', env.state_size, env.action_size)
iteration = 0

importlib.reload(lg)
importlib.reload(config)

print('ITERATION NUMBER ' + str(iteration))

lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
print('BEST PLAYER VERSION ' + str(best_player_version))

######## SELF PLAY ########
EPISODE = 1
print('SELF PLAYING ' + str(EPISODE) + ' EPISODES...')

%timeit -n 1 playMatches(best_player, best_player, EPISODE, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory, goes_first=1)
print ('done.')

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.



ITERATION NUMBER 0
BEST PLAYER VERSION 0
SELF PLAYING 1 EPISODES...
1 1 

KeyboardInterrupt: 

In [9]:
%load_ext line_profiler


In [10]:

env = Game()

# If loading an existing neural network, copy the config file to root
if initialise.INITIAL_RUN_NUMBER != None:
    copyfile(run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py', './config.py')

import config

######## LOAD MEMORIES IF NECESSARY ########

if initialise.INITIAL_MEMORY_VERSION == None:
    memory = Memory(config.MEMORY_SIZE)
else:
    print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...')
    memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p",   "rb" ) )

######## LOAD MODEL IF NECESSARY ########

# create an untrained neural network objects from the config file
# current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (32,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
# best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (32,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
# 上面的32的hard coding，很害人！！！
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)


#If loading an existing neural netwrok, set the weights from that model
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())
#otherwise just ensure the weights on the two players are the same
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

#copy the config file to the run folder
copyfile('./config.py', run_folder + 'config.py')
plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True)

print('\n')

######## CREATE THE PLAYERS ########

current_player = Agent('current_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)
#user_player = User('player1', env.state_size, env.action_size)
iteration = 0

importlib.reload(lg)
importlib.reload(config)

print('ITERATION NUMBER ' + str(iteration))

lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
print('BEST PLAYER VERSION ' + str(best_player_version))

######## SELF PLAY ########
EPISODE = 5
print('SELF PLAYING ' + str(EPISODE) + ' EPISODES...')

%lprun -f playMatches playMatches(best_player, best_player, EPISODE, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory, goes_first=1)
print ('done.')



ITERATION NUMBER 0
BEST PLAYER VERSION 0
SELF PLAYING 5 EPISODES...
1 2 3 4 5 done.


In [4]:
! pip3 install tensorflow==1.5.1

Collecting tensorflow==1.5.1
[?25l  Downloading https://files.pythonhosted.org/packages/9d/e0/d371c595fa93a709a88a1d6127ca8a5e145e42e379c9ef9490407bccea44/tensorflow-1.5.1-cp36-cp36m-manylinux1_x86_64.whl (44.7MB)
[K    100% |████████████████████████████████| 44.7MB 197kB/s ta 0:00:011 2% |▊                               | 1.1MB 184kB/s eta 0:03:57    2% |▉                               | 1.1MB 199kB/s eta 0:03:39    8% |██▊                             | 3.8MB 5.8MB/s eta 0:00:08    8% |██▊                             | 3.9MB 272kB/s eta 0:02:30    16% |█████▎                          | 7.4MB 226kB/s eta 0:02:46    17% |█████▋                          | 7.8MB 262kB/s eta 0:02:21    19% |██████▎                         | 8.7MB 205kB/s eta 0:02:56    20% |██████▋                         | 9.2MB 6.5MB/s eta 0:00:06    28% |█████████                       | 12.7MB 2.5MB/s eta 0:00:13    32% |██████████▍                     | 14.6MB 2.3MB/s eta 0:00:14    33% |██████████▋                 

In [3]:
! python -c 'import tensorflow as tf; print(tf.__version__)'
! python -c 'import keras; print(keras.__version__)'

1.5.1
Using TensorFlow backend.
2018-07-19 02:33:42.571966: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2.0.9


In [None]:
! pip3 install keras==2.1.6


Collecting keras==2.1.6
[?25l  Downloading https://files.pythonhosted.org/packages/54/e8/eaff7a09349ae9bd40d3ebaf028b49f5e2392c771f294910f75bb608b241/Keras-2.1.6-py2.py3-none-any.whl (339kB)
[K    81% |██████████████████████████      | 276kB 11kB/s eta 0:00:060