In [1]:
%pylab inline
from tqdm import tqdm_notebook
import os.path

from rd_model import RazzleDazzleGame, Position
from rd_bots import PositionBot, PositionNet, IdiotBot
from rd_helpers import *

from multiprocessing import Pool

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import random
import numpy as np

Populating the interactive namespace from numpy and matplotlib


ImportError: No module named rd_model

In [2]:
def correct_predictions(output,actual):
    return np.sum(1-np.floor(np.abs(output.flatten()-actual.flatten())))

def generate_game(bot1,bot2):
    game = RazzleDazzleGame(bot1, bot2)
    game.play()
    return game

def get_game_data(game_str):
    gd=[]
    if game_str.strip()[-1] == '8':  # get value
        v = 1
    else:
        v = -1
    pos = Position(score=v)
    white_move = True
    for ms in game_str.split(','):
        m = pos.parse_move(ms)
        if not white_move:
            m = pos.rotate_move(m)

        gd.append([pos.state.flatten(), pos.get_piece_on_square(m[0]), m[-1], v])
        pos=pos.move(m)
        pos.rotate()
    
    return gd

def generate_games(bot1,bot2, games, fname, path):
    game_lengths=[]
    wins=0
    losses=0
    print "Generating %d games of %s vs. %s, stored at %s" % (games, bot1.name,bot2.name, os.path.join(path,fname))
    games=[pool.apply_async(generate_game, args=(bot1,bot2)) for ii in range(games)]
    for game in tqdm_notebook(games):
        game=game.get()
        game_lengths.append(len(game))
        if game.winner>0:
            wins+=1
        else:
            losses+=1
        with open(os.path.join(path,fname),'a') as f:
            f.write(str(game)+'\n')
    print "Stats: mean game length = %d, win/loss = %d / %d" % (np.mean(game_lengths),wins,losses)

            
def generate_dataset(prefix,path, test_size, moves_before_end):
    fname=os.path.join(path,prefix + ".txt")
    wins=[]
    losses=[]
    
    gds=[]
    with open(fname,'r') as f:
        gds = [pool.apply_async(get_game_data,args=(gs,)) for gs in f]

    for g in tqdm_notebook(gds):
        g=g.get()
        for p in g[-moves_before_end:]:
            if p[-1]>0:
                wins.append(p)
            else:
                losses.append(p)

    num_scores=min(len(wins),len(losses))
    test_dataset=wins[:test_size]+losses[:test_size]
    dataset=wins[test_size:num_scores]+losses[test_size:num_scores]
    random.shuffle(dataset)
    random.shuffle(test_dataset)
    dataset = np.transpose(dataset)
    test_dataset = np.transpose(test_dataset)
    positions=np.vstack(dataset[0])
    test_positions=np.vstack(test_dataset[0])
    scores=np.array(dataset[-1],dtype=float)
    test_scores=np.array(test_dataset[-1],dtype=float)

    np.savez_compressed(os.path.join(path,prefix+"_dataset.npz"), 
                        positions=positions, scores=scores, 
                        test_positions=test_positions,test_scores=test_scores)
    
    return (positions,scores,test_positions,test_scores)

pool=Pool(processes=8)

In [3]:
def train_position_iteration(name,datapath,batch_size,epochs,lr=0.001,momentum=0.5,log_interval=10,cuda=False):
    
    index=current_file_index(datapath,prefix=name)
    fname = get_current_filename(datapath=datapath, prefix=name, suffix='.bot')
    fname_out = get_next_filename(datapath=datapath, prefix=name, suffix='.bot')

    with np.load(os.path.join(datapath,name+'_dataset.npz')) as f:
        positions,scores,test_positions,test_scores=f['positions'],f['scores'],f['test_positions'],f['test_scores']
    
    bot1 = PositionBot(fname,path=datapath)
    model = bot1.posnet
    print "Training: " + fname
    print "Training size: %d\t Test size: %d" % (len(scores),len(test_scores))

    loss_fn=nn.MSELoss()

    if cuda:
        model.cuda()
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)


    #test data/value
    test_data  = torch.from_numpy(test_positions).float()
    test_value = torch.from_numpy(test_scores).float()
    if cuda:
        test_data, test_value = test_data.cuda(), test_value.cuda()            
    test_data, test_value = Variable(test_data), Variable(test_value)
            
    for epoch in range(epochs):
        model.train()
        total=0
        correct=0
        for ii in range(len(scores)/batch_size):
            data  = torch.from_numpy(positions[ii*batch_size:(ii+1)*batch_size]).float()
            value = torch.from_numpy(scores[ii*batch_size:(ii+1)*batch_size]).float()
            if cuda:
                data, value = data.cuda(), value.cuda()            
            data, value = Variable(data), Variable(value)
            
            optimizer.zero_grad()
            output = model(data)
            loss = loss_fn(output, value)
            loss.backward()
            optimizer.step()

            correct+=correct_predictions(output.data.cpu().numpy(),value.data.cpu().numpy())
            total+=len(value.data.cpu().numpy())

        if epoch % log_interval == log_interval-1:
            print "Train:\t%.3f%% correct\tLoss: %.6f\t%d/%d" % (correct*100./total,loss.data[0],epoch+1,epochs)

        model.eval()
        test_output=model(test_data)
        test_loss=loss_fn(test_output,test_value)
        correct=correct_predictions(test_output.data.cpu().numpy(),test_value.data.cpu().numpy())
        total=len(test_value.data.cpu().numpy())    
        
        if epoch % log_interval == log_interval-1:
            print "Test:\t%.3f%% correct\tLoss: %.6f" % (correct*100./total,test_loss.data[0])
    
    
    print "Saving trained bot as: " + fname_out
    torch.save(model.state_dict(), os.path.join(datapath,fname_out))
    
    #return test_output,test_value

In [9]:
games=10000
name='iterative'
path='data/'

# Generate some seed data
ib1=IdiotBot()
ib2=IdiotBot()
generate_games(ib1,ib2,games,name+'.txt',path)
(positions,scores,test_positions,test_scores)=generate_dataset(name,'data/',test_size=1000, moves_before_end=10)
train_position_iteration(name=name,datapath='data/',batch_size=200,epochs=25,lr=0.01,momentum=0.5,log_interval=5,cuda=True)


Generating 10000 games of Idiot vs. Idiot, stored at data/iterative.txt



Stats: mean game length = 217, win/loss = 4765 / 5235



Failed to load: data/00000_iterative.bot
Loading new PositionNet model
Training: 00000_iterative.bot
Training size: 93300	 Test size: 2000
Train:	99.068% correct	Loss: 0.012161	5/25
Test:	71.150% correct	Loss: 0.928609
Train:	100.000% correct	Loss: 0.000936	10/25
Test:	72.400% correct	Loss: 0.896846
Train:	100.000% correct	Loss: 0.000412	15/25
Test:	72.800% correct	Loss: 0.904899
Train:	100.000% correct	Loss: 0.000253	20/25
Test:	72.800% correct	Loss: 0.909288
Train:	100.000% correct	Loss: 0.000179	25/25
Test:	72.800% correct	Loss: 0.912320
Saving trained bot as: 00000_iterative.bot


In [None]:
train_position_iteration(name=name,datapath='data/',batch_size=200,epochs=25,lr=0.01,momentum=0.5,log_interval=5,cuda=True)

In [10]:
games=100
name='iterative'
path='data/'
fname = get_current_filename(datapath='data/', prefix=name, suffix='.bot')
b1 = PositionBot(fname,path='data/')
b2 = PositionBot(fname,path='data/')
generate_games(b1,IdiotBot(),games,fname+'.txt',path)


Generating 100 games of 00000_iterative.bot vs. Idiot, stored at data/00000_iterative.bot.txt



Stats: mean game length = 229, win/loss = 43 / 57


In [None]:
games=1000
for ii in range(20):
    fname = get_current_filename(datapath='data/', prefix=name, suffix='.bot')
    b1 = PositionBot(fname,path='data/')
    b2 = PositionBot(fname,path='data/')
    generate_games(b1,b2,games,fname+'.txt',path)
    (positions,scores,test_positions,test_scores)=generate_dataset(fname,'data/',test_size=1000, moves_before_end=10)
    train_position_iteration(name=name,datapath='data/',batch_size=200,epochs=50,lr=0.001,momentum=0.5,log_interval=10,cuda=True)