In [1]:
import os
import json
import chess.pgn

In [2]:
def parse_pgn_file(filename):
    games = []

    with open(filename, 'r', encoding='utf-8') as pgn_file:
        while True:
            try:
                game = chess.pgn.read_game(pgn_file)
                if game is None:
                    break

                headers = dict(game.headers)
                result = headers.get('Result')
                if result not in ['1-0', '0-1', '1/2-1/2']:
                    continue

                moves = []
                node = game
                board = game.board()
                while not node.is_end():
                    next_node = node.variation(0)
                    move = node.board().san(next_node.move)
                    moves.append(move)
                    board.push(next_node.move)
                    node = next_node

                if len(moves) > 0:
                    games.append({
                        'headers': headers,
                        'result': result,
                        'moves': moves
                    })
            except Exception as e:
                print(f'Failed to parse game: {e}')

    return games

In [3]:
if __name__ == '__main__':
    input_folder = 'c:/Jscode/data/board_data'
    output_file = 'games.json'
    games = []

    for filename in os.listdir(input_folder):
        if filename.endswith('.pgn'):
            filepath = os.path.join(input_folder, filename)
            print(f'Processing {filepath}')
            games.extend(parse_pgn_file(filepath))

    with open(output_file, 'w', encoding='utf-8') as json_file:
        json.dump(games, json_file, ensure_ascii=False, indent=4)

    print('완료')

Processing c:/Jscode/data/board_data\1948-fide-world-championship-tournament.pgn
Processing c:/Jscode/data/board_data\1993-karpov-timman.pgn
Processing c:/Jscode/data/board_data\1996-karpov-kamsky-fide.pgn
Processing c:/Jscode/data/board_data\2005-fide-world-championship-tournament.pgn
Processing c:/Jscode/data/board_data\2006-topalov-kramnik.pgn
Processing c:/Jscode/data/board_data\2007-fide-world-championship-tournament.pgn
Processing c:/Jscode/data/board_data\2015-wowbc.pgn
Processing c:/Jscode/data/board_data\2015-wyccc-u18.pgn
Processing c:/Jscode/data/board_data\2017-fide-world-school-chess-championships.pgn
Processing c:/Jscode/data/board_data\2017-womens-world-championship.pgn
Processing c:/Jscode/data/board_data\2018-fide-womens-world-championship.pgn
Processing c:/Jscode/data/board_data\2018-fide-world-junior-open-championships-u20.pgn


error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\lib\site-packages\chess\pgn.py", line 1685, in read_game
    move = visitor.parse_san(board_stack[-1], token)
  File "C:\Anaconda3\lib\site-packages\chess\pgn.py", line 1059, in parse_san
    return board.parse_san(san)
  File "C:\Anaconda3\lib\site-packages\chess\__init__.py", line 3062, in parse_san
    raise IllegalMoveError(f"illegal san: {san!r} in {self.fen()}")
chess.IllegalMoveError: illegal san: 'Ree7' in rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1


Processing c:/Jscode/data/board_data\2020-fide-online-world-rapid-championship-u16.pgn
Processing c:/Jscode/data/board_data\2020-fide-online-world-youth-rapid-championship-africa.pgn
Processing c:/Jscode/data/board_data\2020-fide-online-world-youth-rapid-championship-america.pgn
Processing c:/Jscode/data/board_data\2020-fide-online-world-youth-rapid-championship-europe.pgn
Processing c:/Jscode/data/board_data\2020-fide-womens-world-championship-match.pgn
Processing c:/Jscode/data/board_data\2021-fide-online-world-corporate-chess-championship-finals.pgn
Processing c:/Jscode/data/board_data\2021-fide-women-world-blitz-chess-championship.pgn
Processing c:/Jscode/data/board_data\2021-fide-women-world-rapid-chess-championship.pgn
Processing c:/Jscode/data/board_data\2021-fide-world-blitz-chess-championship.pgn
Processing c:/Jscode/data/board_data\2021-fide-world-championship-for-people-with-disabilities.pgn
Processing c:/Jscode/data/board_data\2021-fide-world-chess-championship.pgn
Processi

error during pgn parsing
Traceback (most recent call last):
  File "C:\Anaconda3\lib\site-packages\chess\pgn.py", line 1685, in read_game
    move = visitor.parse_san(board_stack[-1], token)
  File "C:\Anaconda3\lib\site-packages\chess\pgn.py", line 1059, in parse_san
    return board.parse_san(san)
  File "C:\Anaconda3\lib\site-packages\chess\__init__.py", line 3062, in parse_san
    raise IllegalMoveError(f"illegal san: {san!r} in {self.fen()}")
chess.IllegalMoveError: illegal san: 'Bg7' in rnbqkb1r/pppppppp/5n2/8/2PP4/8/PP2PPPP/RNBQKBNR b KQkq - 0 2


Processing c:/Jscode/data/board_data\2021-world-womens-team-championship-knockout.pgn
Processing c:/Jscode/data/board_data\2021-world-womens-team-championship-pool-a.pgn
Processing c:/Jscode/data/board_data\2021-world-womens-team-championship-pool-b.pgn
Processing c:/Jscode/data/board_data\2022-fide-womens-world-blitz-chess-championship.pgn
Processing c:/Jscode/data/board_data\2022-fide-womens-world-rapid-chess-championship.pgn
Processing c:/Jscode/data/board_data\2022-fide-world-blitz-chess-championship.pgn
Processing c:/Jscode/data/board_data\2022-fide-world-rapid-chess-championship.pgn
Processing c:/Jscode/data/board_data\2022-fide-world-senior-chess-championship.pgn
Processing c:/Jscode/data/board_data\2022-fide-world-senior-team-chess-championship.pgn
Processing c:/Jscode/data/board_data\2022-fide-world-team-championship-playoffs.pgn
Processing c:/Jscode/data/board_data\2022-fide-world-team-championship-pool-stage.pgn
완료


In [4]:
import pandas as pd
import json

In [5]:
with open('C:/JsCode/project/games.json','r', encoding='utf-8') as f:
    data = json.load(f)

In [6]:
data

[{'headers': {'Event': 'FIDE World Chess Championship Tournament 1948',
   'Site': 'https://www.chess.com',
   'Date': '2021.07.05',
   'Round': '01',
   'White': 'Euwe, Max',
   'Black': 'Keres, Paul',
   'Result': '0-1',
   'BlackClock': '0:00:10',
   'TimeControl': '5400+30',
   'WhiteClock': '0:09:20'},
  'result': '0-1',
  'moves': ['e4',
   'e5',
   'Nf3',
   'Nc6',
   'Bb5',
   'a6',
   'Ba4',
   'd6',
   'c3',
   'Bd7',
   'd4',
   'Nge7',
   'Bb3',
   'h6',
   'Nbd2',
   'Ng6',
   'Nc4',
   'Be7',
   'O-O',
   'O-O',
   'Ne3',
   'Bf6',
   'Nd5',
   'exd4',
   'Nxd4',
   'Re8',
   'Nxf6+',
   'Qxf6',
   'f3',
   'Nf4',
   'Nxc6',
   'Bxc6',
   'Be3',
   'Rad8',
   'Qd2',
   'Ng6',
   'Bd4',
   'Qe7',
   'Rae1',
   'Qd7',
   'c4',
   'Ba4',
   'Bxa4',
   'Qxa4',
   'Qc3',
   'f6',
   'f4',
   'Kh7',
   'b3',
   'Qd7',
   'Qf3',
   'b5',
   'Qd3',
   'bxc4',
   'Qxc4',
   'Rxe4',
   'Rxe4',
   'd5',
   'Qxa6',
   'dxe4',
   'Be3',
   'Qg4',
   'Qc4',
   'Rd3',
   'Bc1',
   'Nh4'

In [7]:
rows = []

for game in data:
    headers = game.get('headers', {})
    event = headers.get('Event', '')
    date = headers.get('Date', '')
    result = headers.get('Result', '')
    moves = ' '.join(game['moves'])
    opening = moves.split()[:10]
    opening = ' '.join(opening)
    rows.append({'event': event, 'date': date, 'result': result, 'moves': moves, 'opening': opening})

In [8]:
df = pd.DataFrame(rows, columns=['event', 'date', 'result', 'moves', 'opening'])

print(df.head())

                                           event        date   result  \
0  FIDE World Chess Championship Tournament 1948  2021.07.05      0-1   
1  FIDE World Chess Championship Tournament 1948  2021.07.05  1/2-1/2   
2  FIDE World Chess Championship Tournament 1948  2021.07.05      1-0   
3  FIDE World Chess Championship Tournament 1948  2021.07.05      1-0   
4  FIDE World Chess Championship Tournament 1948  2021.07.05      1-0   

                                               moves  \
0  e4 e5 Nf3 Nc6 Bb5 a6 Ba4 d6 c3 Bd7 d4 Nge7 Bb3...   
1  e4 e5 Nf3 Nc6 Bb5 a6 Ba4 Nf6 O-O Be7 Re1 b5 Bb...   
2  d4 d5 c4 e6 Nf3 Nf6 Nc3 c6 e3 Nbd7 Bd3 Bb4 a3 ...   
3  c4 Nf6 Nf3 c6 Nc3 d5 e3 g6 d4 Bg7 cxd5 Nxd5 Bc...   
4  Nf3 Nf6 c4 b6 d3 g6 e4 d6 Nc3 Bg7 d4 O-O Be2 B...   

                                opening  
0    e4 e5 Nf3 Nc6 Bb5 a6 Ba4 d6 c3 Bd7  
1  e4 e5 Nf3 Nc6 Bb5 a6 Ba4 Nf6 O-O Be7  
2    d4 d5 c4 e6 Nf3 Nf6 Nc3 c6 e3 Nbd7  
3     c4 Nf6 Nf3 c6 Nc3 d5 e3 g6 d4 Bg7  
4     Nf3 Nf6 

In [9]:
df.to_csv('chess_data.csv', index=False)

In [10]:
opening_type = df.groupby(['opening']).count()
print(opening_type)

                                        event  date  result  moves
opening                                                           
Nc3 Nf6 Nf3 Nc6 d4 d5 Bf4 a6 e3 h6          1     1       1      1
Nc3 Nf6 d4 d5 Bf4 Bf5 Nf3 e6 Qd2 Bb4        1     1       1      1
Nc3 Nf6 d4 d5 Bf4 e6 e3 Bb4 Ne2 O-O         1     1       1      1
Nc3 Nf6 e4 e5 Nf3 Nc6 Be2 d5 exd5 Nxd5      1     1       1      1
Nc3 Nf6 e4 e5 g3 c6 Bg2 d5 d3 dxe4          1     1       1      1
...                                       ...   ...     ...    ...
g4 d5 Bg2 Bxg4 c4 e6 Qb3 Nf6 cxd5 Nxd5      1     1       1      1
g4 d5 Bg2 c6 g5 e5 d4 exd4 Qxd4 Be6         1     1       1      1
g4 d5 e3 e5 Bg2 c6 d3 Bd6 h3 Ne7            1     1       1      1
g4 e5 Bg2 d5 e4 Nf6 exd5 Bxg4 f3 Bh5        1     1       1      1
h4 d5 Nf3 Nf6 d4 e6 Bg5 c5 e3 Nc6           1     1       1      1

[7993 rows x 4 columns]


In [11]:
opening_counts = df['opening'].value_counts()
print(opening_counts)

e4 c5 Nf3 d6 d4 cxd4 Nxd4 Nf6 Nc3 a6     395
e4 e5 Nf3 Nc6 Bb5 a6 Ba4 Nf6 O-O Be7     232
e4 c5 Nf3 d6 d4 cxd4 Nxd4 Nf6 Nc3 Nc6     90
e4 c5 Nf3 e6 d4 cxd4 Nxd4 Nc6 Nc3 Qc7     86
e4 c5 Nf3 Nc6 d4 cxd4 Nxd4 Nf6 Nc3 e5     85
                                        ... 
c4 e6 g3 d5 Bg2 d4 Nf3 c5 O-O Nc6          1
e4 e6 d3 d5 Qe2 Nf6 Nf3 dxe4 dxe4 Nc6      1
e4 e6 d4 d5 Nd2 a6 Bd3 c5 c3 Nc6           1
e4 e5 Nf3 Nc6 Bc4 Bc5 c3 d6 d3 Nf6         1
Nf3 d5 e3 Nf6 b3 Bf5 Be2 h6 Ba3 Nbd7       1
Name: opening, Length: 7993, dtype: int64


In [12]:
openingTop100 = opening_counts[:100]
print(openingTop100)

e4 c5 Nf3 d6 d4 cxd4 Nxd4 Nf6 Nc3 a6        395
e4 e5 Nf3 Nc6 Bb5 a6 Ba4 Nf6 O-O Be7        232
e4 c5 Nf3 d6 d4 cxd4 Nxd4 Nf6 Nc3 Nc6        90
e4 c5 Nf3 e6 d4 cxd4 Nxd4 Nc6 Nc3 Qc7        86
e4 c5 Nf3 Nc6 d4 cxd4 Nxd4 Nf6 Nc3 e5        85
                                           ... 
e4 e5 Nf3 Nc6 Bc4 Nf6 d3 h6 O-O d6           16
d4 Nf6 c4 e6 Nf3 d5 Nc3 Nbd7 cxd5 exd5       16
e4 c5 Nf3 Nc6 d4 cxd4 Nxd4 g6 c4 Nf6         15
e4 c5 Nf3 d6 Bb5+ Bd7 Bxd7+ Qxd7 O-O Nc6     15
e4 c5 Nf3 d6 d4 cxd4 Nxd4 Nf6 f3 e5          15
Name: opening, Length: 100, dtype: int64


In [19]:
openingTop10 = openingTop100[:10]
openingTop10

e4 c5 Nf3 d6 d4 cxd4 Nxd4 Nf6 Nc3 a6     395
e4 e5 Nf3 Nc6 Bb5 a6 Ba4 Nf6 O-O Be7     232
e4 c5 Nf3 d6 d4 cxd4 Nxd4 Nf6 Nc3 Nc6     90
e4 c5 Nf3 e6 d4 cxd4 Nxd4 Nc6 Nc3 Qc7     86
e4 c5 Nf3 Nc6 d4 cxd4 Nxd4 Nf6 Nc3 e5     85
d4 Nf6 c4 g6 Nc3 Bg7 e4 d6 Nf3 O-O        80
e4 c5 Nf3 e6 d4 cxd4 Nxd4 Nf6 Nc3 Nc6     73
e4 e5 Nf3 Nc6 Bb5 a6 Ba4 Nf6 O-O b5       70
e4 c5 Nf3 d6 d4 cxd4 Nxd4 Nf6 Nc3 g6      69
d4 Nf6 c4 g6 Nc3 d5 cxd5 Nxd5 e4 Nxc3     59
Name: opening, dtype: int64