# How well are fundamental endings converted depending on ending and rating bracket? 

## Step 1 : Apply a collection of cql files to extract the relevant endings

In [76]:
import subprocess
import os
import chess
import chess.pgn

cql_bin_dir = '/Users/elliottmacneil/chess/cql6/cql'
cql_scripts_dir = '/Users/elliottmacneil/python/reti/src/100endings'
output_dir = '/Users/elliottmacneil/python/reti/output/'
iterated = False
db_dir = '/Users/elliottmacneil/chess/pgn/lichess/lichess_BurntBread_2022-04-07.pgn'

def run_cql_scripts(cql_scripts_dir, db, count, matchlow=1):
	if os.path.isdir(cql_scripts_dir):
		for file in os.listdir(cql_scripts_dir):
			file_dir = '{}/{}'.format(cql_scripts_dir,file)
			file_noext = os.path.splitext(file)[0]
			dir_noext = os.path.join(output_dir,file_noext)
			pgn_file = dir_noext+str(count)+'.pgn'
			cql_command = '{} -i {} -o {} -matchcount {} 100 -threads 20 {}'.format(cql_bin_dir, db, pgn_file, matchlow, file_dir)
			try:
				subprocess.run(cql_command,shell=True)
			except:
				print('failed')
			count += 1
		print('Done.')
	else:
		file_noext = cql_scripts_dir.split('/')[-1][:-4]
		pgn_file = file_noext+str(count)+'.pgn'
		cql_command = '{} -i {} -o {} -matchcount {} 100 -threads 20 {}'.format(cql_bin_dir, db, pgn_file, matchlow, cql_scripts_dir)
		try:
			subprocess.run(cql_command,shell=True)
		except:
			print('failed')
		print('Done.')

In [77]:
# run each script in cql_scripts_dir on each pgn in db_dir
if iterated:
    count = 0
    for filename in os.listdir(db_dir):
        if filename.endswith('.pgn'):
            print(filename)
            db = db_dir + '/' + filename 
            run_cql_scripts(cql_scripts_dir,db,count)
            count += 1
else:
    run_cql_scripts(cql_scripts_dir,db_dir,count=1)

CQL version 6.1 (build 9.362) (c) Gady Costeff and Lewis Stiller
...
0 matches of 3231 games in 0.493 seconds using 20 threads to /Users/elliottmacneil/python/reti/output/99Qrp1.pgn
CQL version 6.1 (build 9.362) (c) Gady Costeff and Lewis Stiller
...
5 matches of 3231 games in 0.444 seconds using 20 threads to /Users/elliottmacneil/python/reti/output/77KPP2.pgn
CQL version 6.1 (build 9.362) (c) Gady Costeff and Lewis Stiller
...
7 matches of 3231 games in 0.414 seconds using 20 threads to /Users/elliottmacneil/python/reti/output/11Np3.pgn
CQL version 6.1 (build 9.362) (c) Gady Costeff and Lewis Stiller
...
54 matches of 3231 games in 0.5 seconds using 20 threads to /Users/elliottmacneil/python/reti/output/52_68RPr4.pgn
CQL version 6.1 (build 9.362) (c) Gady Costeff and Lewis Stiller
...
5 matches of 3231 games in 0.421 seconds using 20 threads to /Users/elliottmacneil/python/reti/output/98RPb5.pgn
CQL version 6.1 (build 9.362) (c) Gady Costeff and Lewis Stiller
...
46 matches of 3231 g

## Step 2 : Extract the first (for simplicity) ending of each game in the pgn, check valuation on tablebase, and append result, theoretical result and ratings to csv

In [3]:
import requests 
import pandas
import chess
import chess.pgn
from reti import utilities
import time
import matplotlib.pyplot as plt
import numpy as np

def check_tablebase(fen):
    r = requests.get('http://tablebase.lichess.ovh/standard?fen={}'.format(fen))
    return r.json()['category']

def compute_conversion(result1,result2):
    'result1 is the result from the tablebase, result2 is the result from the pgn'
    if result1 == result2:
        return 0
    elif result1 == '1-0' and result2 == '0-1':
        return -1
    elif result1 == '0-1' and result2 == '1-0':
        return 1
    elif result1 == '1/2-1/2' and result2 == '1-0':
        return 0.5
    elif result1 == '1/2-1/2' and result2 == '0-1':
        return -0.5
    elif result1 == '1-0' and result2 == '1/2-1/2':
        return -0.5
    elif result1 == '0-1' and result2 == '1/2-1/2':
        return 0.5

In [4]:
def pgn_to_data(path):
    '''
    TODO refactor the name, this sucks.
    '''
    fen_list = []
    side_to_move = []
    theoretical_result = []
    white_elo = []
    black_elo = []
    result = []
    white = []
    black = []
    conversion = []
    count = 0
    pgn = open(path, encoding='utf-8', errors='replace')

    with open(path, encoding="utf-8",errors='replace') as file:
        content = file.read()
    num_games = utilities.game_length_array(content)[0]
    print('number of games:',num_games)
    for _ in range(num_games):
        try:
            game = chess.pgn.read_game(pgn)
            board = game.board()
            for node in game.mainline():
                move = node.move
                comment = node.comment 
                board.push(move)
                if comment == 'CQL':
                    fen = board.fen()
                    fen_list.append(fen)
                    # work out what the result should be 
                    side_to_move = fen.split(' ')[1]
                    tablebase_eval = check_tablebase(fen)
                    if tablebase_eval == 'draw':
                        tablebase_eval = '1/2-1/2'
                    elif tablebase_eval == 'win' and side_to_move == 'w':
                        tablebase_eval = '1-0'
                    elif tablebase_eval == 'win' and side_to_move == 'b':
                        tablebase_eval = '0-1'
                    elif tablebase_eval == 'loss' and side_to_move == 'w':
                        tablebase_eval = '0-1'
                    elif tablebase_eval == 'loss' and side_to_move == 'b':
                        tablebase_eval = '1-0'
                    theoretical_result.append(tablebase_eval)
                    count += 1
                    try:
                        white_elo.append(float(game.headers['WhiteElo']))
                    except:
                        white_elo.append(0)
                    try:
                        black_elo.append(float(game.headers['BlackElo']))
                    except:
                        black_elo.append(0)
                    white.append(game.headers['White'])
                    black.append(game.headers['Black'])
                    result.append(game.headers['Result'])
                    conversion.append(compute_conversion(tablebase_eval,game.headers['Result']))
                    break
                # avoiding the rate limit on the api
                if count > 15:
                    count = 0 
                    time.sleep(60)
        except:
            pass
        else:
            continue
    return fen_list, white_elo, black_elo, white, black, theoretical_result, result, conversion

In [5]:
output_path = '/Users/elliottmacneil/python/reti/output/conversions/'
csv_path = '/Users/elliottmacneil/python/reti/output/csv/'
for file in os.listdir(output_path):
    if file.endswith('.pgn'):
        game = output_path+file
        print(game)
        fen_list, white_elo, black_elo, white, black, theoretical_result, result, conversion = pgn_to_data(game)
        df = pandas.DataFrame({'fen':fen_list, 'white_elo':white_elo, 'black_elo':black_elo, 'white':white, 'black':black, 'theoretical_result':theoretical_result, 'result':result,'conversion':conversion})
        df.to_csv(csv_path+file[:-4]+'.csv')

/Users/elliottmacneil/python/reti/output/conversions/77KPP2.pgn
number of games: 16
/Users/elliottmacneil/python/reti/output/conversions/78KPP20.pgn
number of games: 23
/Users/elliottmacneil/python/reti/output/conversions/69_76RPPr6.pgn
number of games: 72
/Users/elliottmacneil/python/reti/output/conversions/79_82KPp21.pgn
number of games: 62
/Users/elliottmacneil/python/reti/output/conversions/21_29Rvsp8.pgn
number of games: 83
/Users/elliottmacneil/python/reti/output/conversions/97RPb19.pgn
number of games: 58
/Users/elliottmacneil/python/reti/output/conversions/44_51ocBPPb28.pgn
number of games: 64
/Users/elliottmacneil/python/reti/output/conversions/8_9Rn12.pgn
number of games: 98
/Users/elliottmacneil/python/reti/output/conversions/42_43ocBPPb27.pgn
number of games: 16
/Users/elliottmacneil/python/reti/output/conversions/83_87KPPp24.pgn
number of games: 50
/Users/elliottmacneil/python/reti/output/conversions/56RPr26.pgn
number of games: 93
/Users/elliottmacneil/python/reti/output/

In [69]:
np.mean(df['white_elo'])

TypeError: unsupported operand type(s) for +: 'int' and 'str'