In [1]:
import chess.pgn
import csv
import pandas as pd
import os
import io

In [2]:
pgn = open("russian_chess.pgn")

In [3]:
games = []
for game in pgn:
    curr = chess.pgn.read_game(pgn)
    games.append(curr)

In [5]:
len(games)

23

In [6]:
first_game = games[0]

In [7]:
board = first_game.board()
for move in first_game.mainline_moves():
    board.push(move)
    print(move)
    print(board.fen())

e2e4
rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR b KQkq - 0 1
e7e5
rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBNR w KQkq - 0 2
g1f3
rnbqkbnr/pppp1ppp/8/4p3/4P3/5N2/PPPP1PPP/RNBQKB1R b KQkq - 1 2
b8c6
r1bqkbnr/pppp1ppp/2n5/4p3/4P3/5N2/PPPP1PPP/RNBQKB1R w KQkq - 2 3
f1b5
r1bqkbnr/pppp1ppp/2n5/1B2p3/4P3/5N2/PPPP1PPP/RNBQK2R b KQkq - 3 3
a7a6
r1bqkbnr/1ppp1ppp/p1n5/1B2p3/4P3/5N2/PPPP1PPP/RNBQK2R w KQkq - 0 4
b5a4
r1bqkbnr/1ppp1ppp/p1n5/4p3/B3P3/5N2/PPPP1PPP/RNBQK2R b KQkq - 1 4
g8f6
r1bqkb1r/1ppp1ppp/p1n2n2/4p3/B3P3/5N2/PPPP1PPP/RNBQK2R w KQkq - 2 5
e1g1
r1bqkb1r/1ppp1ppp/p1n2n2/4p3/B3P3/5N2/PPPP1PPP/RNBQ1RK1 b kq - 3 5
b7b5
r1bqkb1r/2pp1ppp/p1n2n2/1p2p3/B3P3/5N2/PPPP1PPP/RNBQ1RK1 w kq - 0 6
a4b3
r1bqkb1r/2pp1ppp/p1n2n2/1p2p3/4P3/1B3N2/PPPP1PPP/RNBQ1RK1 b kq - 1 6
f8c5
r1bqk2r/2pp1ppp/p1n2n2/1pb1p3/4P3/1B3N2/PPPP1PPP/RNBQ1RK1 w kq - 2 7
a2a4
r1bqk2r/2pp1ppp/p1n2n2/1pb1p3/P3P3/1B3N2/1PPP1PPP/RNBQ1RK1 b kq - 0 7
a8b8
1rbqk2r/2pp1ppp/p1n2n2/1pb1p3/P3P3/1B3N2/1PPP1PPP/RNBQ1RK1 w k - 1 8
c2c

In [13]:
def extract_combined_moves_and_comments(pgn_text):
    # Convert the string to a StringIO object
    pgn = io.StringIO(pgn_text)
    
    # Read the game
    game = chess.pgn.read_game(pgn)
    
    combined_moves_comments = []
    last_move = ""
    last_comment = ""
    move_number = 1
    
    node = game
    while not node.is_end():
        next_node = node.variation(0)
        move = node.board().san(next_node.move)
        comment = next_node.comment
        
        if node.board().turn == chess.BLACK:
            # Handle White's move
            if last_move:
                player = "White" if move_number % 2 == 1 else "Black"
                combined_move = f"{last_move} {move}"
                combined_comment = comment if comment else last_comment
                combined_moves_comments.append((move_number,player, combined_move, combined_comment))
                last_move = ""
                last_comment = ""
                move_number += 1
            else:
                last_move = move
                last_comment = comment
        else:
            # Handle Black's move
            if last_move:
                player = "White" if move_number % 2 == 1 else "Black"
                combined_move = f" {last_move} {move}"
                combined_comment = comment if comment else last_comment
                combined_moves_comments.append((move_number,player, combined_move, combined_comment))
                last_move = ""
                last_comment = ""
                move_number += 1
            else:
                last_move = move
                last_comment = comment
        
        node = next_node
    
    # Handle case where the game ends after White's move
    if last_move:
        player = "White" if move_number % 2 == 1 else "Black"
        combined_move = f" {last_move}"
        combined_comment = last_comment
        combined_moves_comments.append((move_number, player,combined_move, combined_comment))
    
    return combined_moves_comments

pgn_text = """Your PGN text goes here"""
combined_moves_comments = extract_combined_moves_and_comments(pgn_text)

# Print combined moves and comments
for entry in combined_moves_comments:
    print(entry)


In [14]:
def read_pgn_and_write_to_csv(pgn_file_path, csv_file_path):
    with open(pgn_file_path, 'r', encoding='utf-8') as pgn:
        with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
            csvwriter = csv.writer(csvfile)
            # Write the headers
            csvwriter.writerow(['Game Number', 'Move Number', 'Player', 'Move', 'Comment'])
            
            game_number = 1
            while True:
                game = chess.pgn.read_game(pgn)
                if game is None:
                    break  # End of file or no more games
                
                combined_moves_comments = extract_combined_moves_and_comments(str(game))
                
                # Write each move's data to the CSV, prepending the game number
                for move_number, player, move, comment in combined_moves_comments:
                    csvwriter.writerow([game_number, move_number, player, move, comment])
                
                game_number += 1

In [15]:
read_pgn_and_write_to_csv('/Users/mzk/Documents/566 Project/russian_chess.pgn', 'russian_chess.csv')

In [16]:
def process_pgn_files(directory):
    for filename in os.listdir(directory):
        if filename.endswith('.pgn'):
            pgn_file_path = os.path.join(directory, filename)
            csv_file_path = os.path.join(directory, os.path.splitext(filename)[0] + '.csv')

            # Now read the PGN file and write to the corresponding CSV file
            read_pgn_and_write_to_csv(pgn_file_path, csv_file_path)
            print(f"Processed {filename} to {os.path.splitext(filename)[0] + '.csv'}")

In [17]:
directory = '/Users/mzk/Documents/566 Project'
process_pgn_files(directory)

Processed linares_2002.pgn to linares_2002.csv
Processed middleg.pgn to middleg.csv
Processed russian_chess.pgn to russian_chess.csv


UnicodeDecodeError: 'utf-8' codec can't decode byte 0x94 in position 6535: invalid start byte

In [157]:
#read as df limit 1000
df = pd.read_csv('linares_2002.csv', nrows=1000)