In [58]:
import chess
import chess.engine
import math

# ELO to skill level map

STOCKFISH_LEVELS = {0: 1231,
                    1: 1341,
          2: 1443,
          3: 1538,
          4: 1678,
          5: 1823,
          6: 1881,
          7: 1976,
          8: 2067,
          9: 2129,
          10: 2221,
          11: 2320,
          12: 2406,
          13: 2483,
          14: 2571,
          15: 2657,
          16: 2761,
          17: 2815,
          18: 2872,
          19: 2905,
          20: 3450}

STOCKFISH_PATH = "/Users/brenckle mark/Downloads/stockfish-10-mac/Mac/stockfish-10-64"

def play_benchmark_game(stkfsh_path, model, skill=1, model_options={}, engine_white=True):
    """Play benchmark game against stockfish engine
    
    parameters:
        stkfsh_path (str) -- path to stockfish engine executable
        model (python-chess.Engine) -- model to compare engine against
        model_options(dict) -- dictionary of UCI options for model
        skill (int) -- skill-level of stockfish engine, accepts 0-20
        white (bool, True) -- whether stockfish goes first

    output:
        result (str) -- result of game
    """
    board = chess.Board()
    stockfish = {"eng": chess.engine.SimpleEngine.popen_uci(stkfsh_path),
                 "limit": chess.engine.Limit(time=0.1),
                 "options": {"Skill Level": skill}}
    mdl_engine = {"eng": model,
                 "limit": chess.engine.Limit(time=0.1),
                 "options": model_options}
    if engine_white:
        players = [stockfish, mdl_engine]
    else:
        players = [mdl_engine, stockfish]
    idx = 1
    while not board.is_game_over():
        if idx % 2 == 0:
            player = players[0]
        else:
            player = players[1]
        move = player['eng'].play(board, player['limit'], options=player['options'])
        board.push(move.move)
        idx += 1

    result = board.result()
    stockfish['eng'].quit()
    return result


def elo_win_probability(rating1, rating2): 
    return 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating1 - rating2) / 400)) 
   
def EloRating(Ra, Rb, win): 
    """Update ELO.

    parameters:
        Ra -- rating, player A
        Rb -- rating, player B
        win -- decision for player A
    returns:
        Ea -- new ELO of player A
        Eb -- new ELO of player B
    """

    Pb = elo_win_probability(Ra, Rb) 
    Pa = elo_win_probability(Rb, Ra) 
  
    if (win == 1) : 
        Ea = Ra + 40 * (1 - Pa) 
        Eb = Rb + 40 * (0 - Pb) 
    else : 
        Ea = Ra + 40 * (0 - Pa) 
        Eb = Rb + 40 * (1 - Pb)
    Ea = round(Ea, 0)
    Eb = round(Eb, 0)
    return (Ea, Eb)


In [59]:
# set up testing engine
engine = chess.engine.SimpleEngine.popen_uci("/Users/brenckle mark/Downloads/stockfish-10-mac/Mac/stockfish-10-64")
engine_opts = {"Skill Level": 10}
elo_guess = 1000

results = []
# play games against a range of skills
idx = 0
for skill in range(20):
    for game in range(10):
        white = (game % 2 == 0)
        decision = play_benchmark_game(STOCKFISH_PATH, engine, skill=skill, model_options=engine_opts, engine_white=white)
        result = {"skill": skill, "white": white, "decision": decision}
        results.append(result)
        idx +=1
        if idx % 10 == 0:
            print("finished playing game", idx, "of 200")
engine.quit()

finished playing game 10 of 200
finished playing game 20 of 200
finished playing game 30 of 200
finished playing game 40 of 200
finished playing game 50 of 200
finished playing game 60 of 200
finished playing game 70 of 200
finished playing game 80 of 200
finished playing game 90 of 200
finished playing game 100 of 200
finished playing game 110 of 200
finished playing game 120 of 200
finished playing game 130 of 200
finished playing game 140 of 200
finished playing game 150 of 200
finished playing game 160 of 200
finished playing game 170 of 200
finished playing game 180 of 200
finished playing game 190 of 200
finished playing game 200 of 200


In [64]:
# start with low initial guess for ELO
elo = elo_guess

# update ELO based on results
for result in results:
    benchmark_skill = STOCKFISH_LEVELS[result['skill']]
    if result['white']:
        if result['decision'] == "1-0":
            ratings = EloRating(elo, benchmark_skill, 1)
        elif result['decision'] == "0-1":
            ratings = EloRating(elo, benchmark_skill, 0)
    else:
        if result['decision'] == "0-1":
            ratings = EloRating(elo, benchmark_skill, 1)
        elif result['decision'] == "1-0":
            ratings = EloRating(elo, benchmark_skill, 0)
    elo = ratings[0]

print(elo)

2112.0


In [63]:
results

[{'decision': '1-0', 'skill': 0, 'white': True},
 {'decision': '0-1', 'skill': 0, 'white': False},
 {'decision': '1-0', 'skill': 0, 'white': True},
 {'decision': '0-1', 'skill': 0, 'white': False},
 {'decision': '1-0', 'skill': 0, 'white': True},
 {'decision': '0-1', 'skill': 0, 'white': False},
 {'decision': '1-0', 'skill': 0, 'white': True},
 {'decision': '0-1', 'skill': 0, 'white': False},
 {'decision': '1-0', 'skill': 0, 'white': True},
 {'decision': '0-1', 'skill': 0, 'white': False},
 {'decision': '1-0', 'skill': 1, 'white': True},
 {'decision': '0-1', 'skill': 1, 'white': False},
 {'decision': '1-0', 'skill': 1, 'white': True},
 {'decision': '0-1', 'skill': 1, 'white': False},
 {'decision': '1-0', 'skill': 1, 'white': True},
 {'decision': '0-1', 'skill': 1, 'white': False},
 {'decision': '1-0', 'skill': 1, 'white': True},
 {'decision': '0-1', 'skill': 1, 'white': False},
 {'decision': '1-0', 'skill': 1, 'white': True},
 {'decision': '0-1', 'skill': 1, 'white': False},
 {'decisio