In [1]:
import os
import sys
import chess.pgn as pychess
from chess.pgn import read_game
from chess import square_name
from chess import SQUARE_NAMES, BB_SQUARES
import ChessFuncs
from ChessFuncs import end_board
import bz2
import time
from io import StringIO
from multiprocessing import Process
import time

In [2]:
# in_path = "/Volumes/SAMSUNG EVO SSD 500GB/pgn3.txt"
# temp_path = "/Volumes/SAMSUNG EVO SSD 500GB/pgn-chunk.txt"
# out_path = "/Volumes/SAMSUNG EVO SSD 500GB/chess-results-3.txt"

In [3]:
in_path = "test-pgns.txt"
out_path = "test-results.txt"

In [5]:
def end_board(parsed_pgn_file):
    current_board = parsed_pgn_file.board()
    for move in parsed_pgn_file.main_line():
        current_board.push(move)    
    
    return current_board

In [6]:
def main():
    with open(in_path) as bigfile:
        pgn = []
        for line in bigfile:  
            if line.startswith("[Event"):
                pgn.append(line)
            elif line.startswith("1. "):
                pgn.append(line)
                pgn = "".join(pgn)
                pgn = StringIO(pgn)
                pgn = read_game(pgn)
                result = pgn.headers["Result"]
                board = end_board(pgn)
                 
                if result in ["1-0","0-1"] and board.is_checkmate():
                    if result == "1-0":
                        king_num = board.king(0) # BLACK loses
                    if result == "0-1":
                        king_num = board.king(1) # WHITE loses
                    with open(out_path, "a") as results:
                        results.write("%i\n" % king_num)     
                else:
                    pass # if you want to save other data
                
                pgn = []
                
            else:
                pgn.append(line)

In [None]:
%%time
main()
# this took 7 min 1s to run a 90mb PGN file

In [None]:
multiplier = round(409700297/121332)
mins = 7 * multiplier
hours = mins / 60
days = hours / 24
days_to_run = round(days,1)
print(days_to_run)

# Test Multiprocessing
Code below parses two PGN files at once, for about a 2X speed increase

In [13]:
pgn1 = "test-pgns.txt"
pgn2 = "test-pgns-2.txt"

In [8]:
#the thing which will run simultaneously
def main(pgn_file):
    
    with open(pgn_file) as bigfile:
        pgn = []
        for line in bigfile:  
            if line.startswith("[Event"):
                pgn.append(line)
            elif line.startswith("1. "):
                pgn.append(line)
                pgn = "".join(pgn)
                pgn = StringIO(pgn)
                pgn = read_game(pgn)
                result = pgn.headers["Result"]
                board = end_board(pgn)
                 
                if result in ["1-0","0-1"] and board.is_checkmate():
                    if result == "1-0":
                        king_num = board.king(0) # BLACK loses
                    if result == "0-1":
                        king_num = board.king(1) # WHITE loses
                    with open(out_path, "a") as results:
                        results.write("%i\n" % king_num)     
                else:
                    pass # if you want to save other data
                
                pgn = []
                
            else:
                pgn.append(line)

In [15]:
def plain():
    start = time.time()
    print("starting 1")
    main(pgn1)
    print("starting 2")
    main(pgn2)
    processtime = time.time() - start
    return processtime

In [19]:
def multi():
    start = time.time()
    p1 = Process(target=main, args=(pgn1,))
    p2 = Process(target=main, args=(pgn2,))
    p1.start()
    p2.start()
    p1.join()
    p2.join()
    processtime = time.time() - start
    return processtime

In [11]:
speed_increase = plain() / multi()
print(f"Multiprocessing was {round(speed_increase,2)}X faster.")

Multiprocessing was 10264.79X faster.


In [21]:
%%time
plain()

starting 1
starting 2
CPU times: user 20 s, sys: 263 ms, total: 20.3 s
Wall time: 20.3 s


20.3317129611969

In [20]:
%%time
multi()

CPU times: user 3.58 ms, sys: 9.99 ms, total: 13.6 ms
Wall time: 11.3 s


11.303863048553467

In [66]:
def check_same_file():
    import pandas as pd
    pdf = pd.read_csv("test-results-plain.txt", names=["king"])
    mdf = pd.read_csv("test-results-multi.txt", names=["king"])
    return pdf['king'].mode() == mdf['king'].mode()