#Сеть с двумя выходами - policy и value

In [1]:
#забираем из Google Drive если есть)))
!cp ./drive/MyDrive/colab1/dlgo.zip ./dlgo.zip

In [2]:
#распаковка модулей DL-Go
!unzip -o dlgo.zip

Archive:  dlgo.zip
   creating: dlgo/
   creating: dlgo/agent/
  inflating: dlgo/agent/base.py      
  inflating: dlgo/agent/helpers.py   
  inflating: dlgo/agent/naive.py     
  inflating: dlgo/agent/pg.py        
  inflating: dlgo/agent/predict.py   
  inflating: dlgo/agent/termination.py  
 extracting: dlgo/agent/__init__.py  
   creating: dlgo/agent/__pycache__/
  inflating: dlgo/agent/__pycache__/base.cpython-310.pyc  
  inflating: dlgo/agent/__pycache__/base.cpython-36.pyc  
  inflating: dlgo/agent/__pycache__/helpers.cpython-310.pyc  
  inflating: dlgo/agent/__pycache__/helpers.cpython-36.pyc  
  inflating: dlgo/agent/__pycache__/naive.cpython-310.pyc  
  inflating: dlgo/agent/__pycache__/naive.cpython-36.pyc  
  inflating: dlgo/agent/__pycache__/predict.cpython-310.pyc  
  inflating: dlgo/agent/__pycache__/predict.cpython-36.pyc  
  inflating: dlgo/agent/__pycache__/termination.cpython-310.pyc  
  inflating: dlgo/agent/__pycache__/termination.cpython-36.pyc  
  inflating: dlgo/

##1 - init_ac_agent.py - инициализация бота

python init_ac_agent.py --board-size 9 ac_v1.hdf5

In [13]:
BOARD_SIZE = 9
OUTPUT_FILE = 'ac_v1.hdf5'

In [1]:
import h5py

from keras.models import Model
from keras.layers import Conv2D, Dense, Flatten, Input

from dlgo import rl
from dlgo import encoders

In [5]:
encoder = encoders.get_encoder_by_name('simple', BOARD_SIZE)

In [6]:
board_input = Input(shape=encoder.shape(), name='board_input')

In [7]:
# Добавьте нужное количество сверточных слоев
conv1 = Conv2D(64, (3,3),
               padding='same',
               activation='relu')(board_input)
conv2 = Conv2D(64, (3,3),
               padding='same',
               activation='relu')(conv1)
conv3 = Conv2D(64, (3,3),
               padding='same',
               activation='relu')(conv2)

In [8]:
flat = Flatten()(conv3)
#В этом примере используется скрытые слои размером 512.
processed_board = Dense(512)(flat)

In [9]:
# Этот выход соответствует функции "политики".
policy_hidden_layer = Dense(512, activation='relu')(processed_board)
policy_output = Dense(encoder.num_points(), activation='softmax')(policy_hidden_layer)

In [10]:
# Этот выход соответствует функции "ценности".
value_hidden_layer = Dense(512, activation='relu')(processed_board)
value_output = Dense(1, activation='tanh')(value_hidden_layer)

In [11]:
model = Model(inputs=[board_input], outputs=[policy_output, value_output])

In [12]:
new_agent = rl.ACAgent(model, encoder)

In [13]:
with h5py.File(OUTPUT_FILE, 'w') as outf:
  new_agent.serialize(outf)



In [14]:
!cp ./ac_v1.hdf5 ./drive/MyDrive/colab1/ac_v1.hdf5

##2 - self_play_ac.py - генерация игр

python self_play_ac.py --board-size 9 --learning-agent ac_v1.hdf5 --num-games 5000 --experience-out exp_0001.hdf5

In [15]:
BOARD_SIZE = 9
LEARNING_AGENT = 'ac_v1.hdf5'
NUM_GAMES = 100 #5000 #на "Simulating game 377/5000..." закончилась память
EXPERIENCE_OUT = 'exp_0001.hdf5'
#temperature = 0.0
#game_log_out = 'log_1.txt'

In [7]:
import datetime
from collections import namedtuple

from dlgo.rl.ac import load_ac_agent
from dlgo import scoring
from dlgo.goboard import GameState, Player, Point

In [8]:
COLS = 'ABCDEFGHJKLMNOPQRST'
STONE_TO_CHAR = {
    None: '.',
    Player.black: 'x',
    Player.white: 'o',
}

In [9]:
def avg(items):
    if not items:
        return 0.0
    return sum(items) / float(len(items))


def print_board(board):
    for row in range(BOARD_SIZE, 0, -1):
        line = []
        for col in range(1, BOARD_SIZE + 1):
            stone = board.get(Point(row=row, col=col))
            line.append(STONE_TO_CHAR[stone])
        print('%2d %s' % (row, ''.join(line)))
    print('   ' + COLS[:BOARD_SIZE])

def name(player):
    if player == Player.black:
        return 'B'
    return 'W'

In [10]:
class GameRecord(namedtuple('GameRecord', 'moves winner margin')):
    pass

In [11]:
def simulate_game(black_player, white_player):
    moves = []
    game = GameState.new_game(BOARD_SIZE)
    agents = {
        Player.black: black_player,
        Player.white: white_player,
    }
    while not game.is_over():
        next_move = agents[game.next_player].select_move(game)
        moves.append(next_move)
        game = game.apply_move(next_move)

    print_board(game.board)
    game_result = scoring.compute_game_result(game)
    print(game_result)

    return GameRecord(
        moves=moves,
        winner=game_result.winner,
        margin=game_result.winning_margin,
    )

In [21]:
agent1 = load_ac_agent(h5py.File(LEARNING_AGENT))
agent2 = load_ac_agent(h5py.File(LEARNING_AGENT))



In [22]:
collector1 = rl.ExperienceCollector()
collector2 = rl.ExperienceCollector()

agent1.set_collector(collector1)
agent2.set_collector(collector2)

In [23]:
for i in range(NUM_GAMES):
    print('Simulating game %d/%d...' % (i + 1, NUM_GAMES))
    collector1.begin_episode()
    collector2.begin_episode()

    game_record = simulate_game(agent1, agent2)
    if game_record.winner == Player.black:
        collector1.complete_episode(reward=1)
        collector2.complete_episode(reward=-1)
    else:
        collector2.complete_episode(reward=1)
        collector1.complete_episode(reward=-1)

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
 9 xxooooo.o
 8 xxxo.o.oo
 7 x.xxooooo
 6 xxxxooo.o
 5 xxxxxxooo
 4 xox.xxxoo
 3 xoxxxxooo
 2 oooooxxxo
 1 .oo.oxxxx
   ABCDEFGHJ
W+14.5
Simulating game 60/100...
 9 o.o.ooxxx
 8 ooooo.oxx
 7 oooooooxx
 6 xxxxooxxx
 5 xxoooxxx.
 4 xxooxxxxx
 3 x.xoxxoxx
 2 xxxoxxooo
 1 xx.xxxo.o
   ABCDEFGHJ
B+1.5
Simulating game 61/100...
 9 oxxxxo.oo
 8 oxxxooooo
 7 ooxxxoo.o
 6 ooxxooooo
 5 o.xooooxx
 4 oxxxooxxx
 3 xxxxxxx.x
 2 .x.x.xxox
 1 xxxxxxxx.
   ABCDEFGHJ
B+5.5
Simulating game 62/100...
 9 x.xooxooo
 8 xxxoxxooo
 7 xx.oxx.oo
 6 xoxxx.xxx
 5 oooxxxxox
 4 ooox.xxoo
 3 o.oxxx.x.
 2 oooxxxxxo
 1 ooxx.xxxx
   ABCDEFGHJ
B+8.5
Simulating game 63/100...
 9 o.ooxxx.x
 8 oooxx.xxx
 7 oooxxxxxx
 6 ooooox.xx
 5 o.oooxxxx
 4 oooo.ox.x
 3 ooooooxxx
 2 o.o.ooxxx
 1 ooooxxx.x
   ABCDEFGHJ
W+10.5
Simulating game 64/100...
 9 .ooxxxxx.
 8 ooxxxxxox
 7 o.oox.xoo
 6 ooooxxxox
 5 .ooxxxoox
 4 oooooooxx
 3 o.oxxxoxx
 2 ooox.xox.
 1

In [24]:
experience = rl.combine_experience([collector1, collector2])
with h5py.File(EXPERIENCE_OUT, 'w') as experience_outf:
    experience.serialize(experience_outf)

In [25]:
!cp ./exp_0001.hdf5 ./drive/MyDrive/colab1/exp_0001.hdf5

##3 - train_ac.py - обучение бота

python train_ac.py --learning-agent bots/ac_v1.hdf5 --agent-out bots/ac_v2.hdf5 --lr 0.01 --bs 1024 exp_0001.hdf5

*Для обучения можно будет подавать несколько файлов сыгранных партий "exp_xxxx.hdf5"*

In [2]:
learning_agent = 'ac_v1.hdf5'
agent_out = 'ac_v2.hdf5'
lr = 0.01
bs = 1024
experience = ['exp_0001.hdf5']

    
learning_agent_filename = learning_agent
experience_files = experience
updated_agent_filename = agent_out
learning_rate = lr
batch_size = bs

In [3]:
learning_agent = rl.load_ac_agent(h5py.File(learning_agent_filename))
for exp_filename in experience_files:
    exp_buffer = rl.load_experience(h5py.File(exp_filename))
    learning_agent.train(
        exp_buffer,
        lr=learning_rate,
        batch_size=batch_size)

with h5py.File(updated_agent_filename, 'w') as updated_agent_outf:
    learning_agent.serialize(updated_agent_outf)

  super().__init__(name, **kwargs)




In [4]:
!cp ./ac_v2.hdf5 ./drive/MyDrive/colab1/ac_v2.hdf5

##4 - eval_ac_bot.py - сравнение нового бота со старым

python eval_ac_bot.py --agent1 bots/ac_v2.hdf5 --agent2 bots/ac_v1.hdf5 --num-games 100

In [14]:
BOARD_SIZE = 9
NUM_GAMES = 100
FILE_AGENT_1 = 'ac_v2.hdf5'
FILE_AGENT_2 = 'ac_v1.hdf5'

In [15]:
agent1 = load_ac_agent(h5py.File(FILE_AGENT_1))
agent2 = load_ac_agent(h5py.File(FILE_AGENT_2))



In [16]:
wins = 0
losses = 0
color1 = Player.black
for i in range(NUM_GAMES):
    print('Simulating game %d/%d...' % (i + 1, NUM_GAMES))
    if color1 == Player.black:
        black_player, white_player = agent1, agent2
    else:
        white_player, black_player = agent1, agent2
    game_record = simulate_game(black_player, white_player)
    if game_record.winner == color1:
        wins += 1
    else:
        losses += 1
    color1 = color1.other
print('Agent 1 record: %d/%d' % (wins, wins + losses))

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
 9 xxxxxx.xx
 8 .xx.xxxxo
 7 xxxxx.xoo
 6 xx.ooxxoo
 5 xxxxxoxo.
 4 .xxxx.xoo
 3 x.x.xxxoo
 2 xxxxxoxoo
 1 .xxxoooo.
   ABCDEFGHJ
B+27.5
Simulating game 59/100...
 9 xxxxxoooo
 8 .xooooo.o
 7 xxxoooooo
 6 .xxo.o.oo
 5 xxxxooooo
 4 x.xxxox.o
 3 xxxxxoxxo
 2 xx.xooooo
 1 xxxxox.oo
   ABCDEFGHJ
W+8.5
Simulating game 60/100...
 9 .ooxxx.o.
 8 oooooxo.o
 7 oo.ooooo.
 6 ooooxoooo
 5 ooox.oxoo
 4 oooooxxxo
 3 oooooxxoo
 2 o.ooooxxx
 1 ooooxxxx.
   ABCDEFGHJ
W+46.5
Simulating game 61/100...
 9 oooooo.o.
 8 oxooxoooo
 7 o.ooxxxxo
 6 ooooxx.xx
 5 .oooxoxxx
 4 ooooooxxx
 3 o.o.ooox.
 2 .oooxxxxx
 1 o.oxxx.xx
   ABCDEFGHJ
W+25.5
Simulating game 62/100...
 9 .oooooxxx
 8 oooooxxx.
 7 .o.oxxxxx
 6 oooox.x.x
 5 ooxxxxxxx
 4 xxxx.xxxx
 3 xxxxxx.xx
 2 xx.oxxxxx
 1 xxxxxxo.x
   ABCDEFGHJ
B+25.5
Simulating game 63/100...
 9 x.xxoxxx.
 8 xxxoooxxx
 7 x.xooooxx
 6 xxxxoooox
 5 xxxooo.oo
 4 xxo.oooo.
 3 xooox.xox
 2 oooooxooo
