# Generating a training set

We want to generate a bunch of positions, FEN -> Win probability (converted to centipawns)

Quantity or quality? Do we want to play out many with short time controls or play fewer with longer time control

How to chose positions: sample randomly from Lichess database...

For each position, play 1000 games with time control variance.

Store in sqlite for stability.


In [16]:
import sys
import random
import sqlite3

import chess
import chess.engine
import chess.pgn

engine = chess.engine.SimpleEngine.popen_uci("/home/jerome/projects/chess20/chess/build/apps/chess20")

LICHESS_FILE = "/home/jerome/projects/chess20/learn/lichess_db_standard_rated_2018-02.pgn"

In [11]:
def position_generator():
    """yield a FEN based on lichess games"""
    pgn = open(LICHESS_FILE)
    games = [chess.pgn.read_game(pgn) for _ in range(100)]
    while True:
        game = random.choice(games)
        i = 0 
        board = game.board()
        for move in game.mainline_moves():
            board.push(move)
            i += 1
        k = random.randint(0, i-1)
        i = 0
        board = game.board()
        for move in game.mainline_moves():
            board.push(move)
            i += 1
            if i == k:
                yield board.fen()
                break
                
rng = position_generator()



r1b2rk1/ppp2ppp/1b4q1/1N1PN2n/4PB2/2P3P1/PP2Q2P/R3K2B b Q - 12 19
2kr1bnr/ppR3pp/3p4/3Np3/8/4BQ2/Pq3PPP/5RK1 b - - 0 15
1k1r1b1r/ppR1n1pp/3p4/3Np3/8/4BQP1/Pq3P1P/5RK1 w - - 1 17
rnbqkb1r/1p3ppp/p2p1n2/4p3/4P3/1NN5/PPP1BPPP/R1BQK2R b KQkq - 1 7


In [7]:
# Database wrangling

DB = "db.sqlite3"


def create_table():
    conn = sqlite3.connect(DB)
    c = conn.cursor()
    c.execute("""CREATE TABLE positions (
        id integer PRIMARY KEY,
        fen text NOT NULL UNIQUE,
        plus int NOT NULL,
        minus int NOT NULL,
        playouts int NOT NULL
    )""")
    conn.commit()
    conn.close()


OperationalError: table positions already exists

In [13]:
def playout(fen):
    board = chess.Board(fen)
    while not board.is_game_over():
        timedelta = random.uniform(0, 0.02)
        result = engine.play(board, chess.engine.Limit(time=0.1+timedelta))
        board.push(result.move)
    if board.result() == "1-0":
        return (board, (1, 0))
    elif board.result() == "0-1":
        return (board, (0, 1))
    else:
        return (board, (0, 0))

In [14]:
def update_db(fen, playout_result):
    fen = " ".join(fen.split(" ")[0:-2])
    plus, minus = playout_result
    conn = sqlite3.connect(DB)
    c = conn.cursor()
    c.execute(f"SELECT * FROM positions WHERE fen = ?", (fen,))
    fetch = c.fetchall()
    if fetch:
        fetch = fetch[0]
        plus = plus + fetch[2]
        minus = minus + fetch[3]
        total = fetch[4] + 1
        c.execute(f"UPDATE positions SET plus=?, minus=?, playouts=? WHERE fen=? ", (plus, minus, total, fen))
    else:
        c.execute(f"INSERT INTO positions (fen, plus, minus, playouts) VALUES (?, ?, ?, ?)", (fen, plus, minus, 1))
    conn.commit()
    conn.close()
    


In [18]:
def do_one_cycle(fen):
    board, result = playout(fen)
    while True:
        try:
            board.pop()
        except:
            break
        update_db(board.fen(), result)
    

while True:
    fen = next(rng)
    for _ in range(100):
        do_one_cycle(fen)

    

EngineError: invalid uci (use 0000 for null moves): 'a1a1'