In [3]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [9]:
raw = open('sample-data.txt', 'r').read().splitlines()
raw

['[Event "Rated Bullet game"]',
 '[Site "https://lichess.org/qjWLfHje"]',
 '[Date "2024.06.01"]',
 '[Round "-"]',
 '[White "simple-bot"]',
 '[Black "simplexitor"]',
 '[Result "1/2-1/2"]',
 '[UTCDate "2024.06.01"]',
 '[UTCTime "00:00:00"]',
 '[WhiteElo "1872"]',
 '[BlackElo "1744"]',
 '[WhiteRatingDiff "-2"]',
 '[BlackRatingDiff "+2"]',
 '[WhiteTitle "BOT"]',
 '[BlackTitle "BOT"]',
 '[ECO "A15"]',
 '[Opening "English Opening: Anglo-Indian Defense, King\'s Knight Variation"]',
 '[TimeControl "60+1"]',
 '[Termination "Normal"]',
 '',
 '1. c4 { [%clk 0:01:00] } 1... Nf6 { [%clk 0:01:00] } 2. Nf3 { [%clk 0:01:00] } 2... c5 { [%clk 0:00:59] } 3. e3 { [%clk 0:00:59] } 3... g6 { [%clk 0:00:58] } 4. Nc3 { [%clk 0:00:58] } 4... Bg7 { [%clk 0:00:57] } 5. Bd3 { [%clk 0:00:57] } 5... d5 { [%clk 0:00:56] } 6. cxd5 { [%clk 0:00:57] } 6... Nxd5 { [%clk 0:00:55] } 7. Nxd5 { [%clk 0:00:56] } 7... Qxd5 { [%clk 0:00:54] } 8. Qb3 { [%clk 0:00:55] } 8... Qxb3 { [%clk 0:00:53] } 9. axb3 { [%clk 0:00:54] } 9.

In [40]:
from collections import namedtuple

RawGame = namedtuple('RawGame', ['metadata', 'moves'])

def is_metadata_line(line):
    return line[0] == "[" and line[-1] == "]"

def is_moves_line(line):
    return line[-3:] == "1-0" or line[-3:] == "0-1" or line[-7:] == "1/2-1/2"

raw_games = []
current_metadata = []

for line in raw:
    if line == '':
        continue
    
    if is_metadata_line(line):
        current_metadata.append(line)
        continue
    
    if is_moves_line(line):
        raw_game = RawGame(current_metadata, line)
        raw_games.append(raw_game)
        current_metadata = []
        continue
        
raw_games = list(filter(lambda raw_game: len(raw_game.moves.strip()) > 7, raw_games))

In [43]:
i = 1
print(raw_games[i])

RawGame(metadata=['[Event "Rated Blitz game"]', '[Site "https://lichess.org/u5a9yCCK"]', '[Date "2024.06.01"]', '[Round "-"]', '[White "bicanbican"]', '[Black "browniemeister"]', '[Result "1-0"]', '[UTCDate "2024.06.01"]', '[UTCTime "00:00:00"]', '[WhiteElo "849"]', '[BlackElo "878"]', '[WhiteRatingDiff "+6"]', '[BlackRatingDiff "-6"]', '[ECO "C25"]', '[Opening "Vienna Game"]', '[TimeControl "300+3"]', '[Termination "Normal"]'], moves='1. e4 { [%clk 0:05:00] } 1... e5 { [%clk 0:05:00] } 2. Nc3 { [%clk 0:05:00] } 2... f6 { [%clk 0:05:01] } 3. Nf3 { [%clk 0:04:57] } 3... c6 { [%clk 0:04:55] } 4. Bc4 { [%clk 0:04:59] } 4... Bc5 { [%clk 0:04:55] } 5. d3 { [%clk 0:04:58] } 5... Qb6 { [%clk 0:04:56] } 6. O-O { [%clk 0:04:32] } 6... Nh6 { [%clk 0:04:54] } 7. h3 { [%clk 0:04:29] } 7... d6 { [%clk 0:04:51] } 8. Bxh6 { [%clk 0:04:25] } 8... gxh6 { [%clk 0:04:53] } 9. g3 { [%clk 0:04:25] } 9... Bxh3 { [%clk 0:04:54] } 10. Kh2 { [%clk 0:04:24] } 10... Bxf1 { [%clk 0:04:55] } 11. Qxf1 { [%clk 0:04:

In [59]:
import re

move_pattern = re.compile(r'(\d+\.)\s*([BNRQK]?[a-h]?[1-8]?x?[a-h][1-8](?:=[BNRQ])?(?:e\.p\.)?[+#]?|O-O(?:-O)?)\s*(?:\{[^}]*\})?\s*(?:(\d+)\.{3})?\s*([BNRQK]?[a-h]?[1-8]?x?[a-h][1-8](?:=[BNRQ])?(?:e\.p\.)?[+#]?|O-O(?:-O)?)?')
result_pattern = re.compile(r'(1-0|0-1|1/2-1/2)')

def process_chess_moves(input_string):
    # Extract moves
    moves = move_pattern.findall(input_string)
    
    # Process moves
    processed_moves = []
    for move in moves:
        if move[1]:  # White's move
            processed_moves.append(move[1])
        if move[3]:  # Black's move
            processed_moves.append(move[3])
    
    # Extract result
    result = result_pattern.search(input_string)
    result = result.group(1) if result else ""
    
    # Combine moves and result
    output = " ".join(processed_moves + [result]).strip()
    
    return output

games = [process_chess_moves(raw_game.moves) for raw_game in raw_games]
print(process_chess_moves(raw_games[i].moves))
games

e4 e5 Nc3 f6 Nf3 c6 Bc4 Bc5 d3 Qb6 O-O Nh6 h3 d6 Bxh6 gxh6 g3 Bxh3 Kh2 Bxf1 Qxf1 Bxf2 Be6 Nd7 Na4 Qe3 Kg2 h5 c3 Qd2 1-0


['c4 Nf6 Nf3 c5 e3 g6 Nc3 Bg7 Bd3 d5 cxd5 Nxd5 Nxd5 Qxd5 Qb3 Qxb3 axb3 O-O Ra5 Nd7 Bb5 Rd8 Bxd7 Bxd7 Rxc5 b6 Rc7 e5 O-O e4 Ng5 Bb5 Rd1 Bd3 Nxf7 Rdc8 Re7 Rc2 Nd6 Bf8 Rxe4 Bxd6 Rd4 Be2 Re1 Rd8 f4 Bb5 f5 gxf5 g3 Kf7 Rd1 Be7 Rxd8 Bxd8 Kg2 Bc6+ Kf2 Be7 Ke2 Bb5+ Kf3 Bc6+ Ke2 Ke8 Rg1 Bb5+ Kf3 Bc6+ Kf4 a5 Kxf5 Bd7+ Ke4 Bc6+ Kd3 Rc5 b4 Rd5+ Kc2 Ba4+ b3 Bc6 bxa5 Rxa5 Re1 Be4+ Kc3 Bf6+ Kb4 Be7+ Kc3 Rc5+ Kd4 Bb1 e4 Kd7 e5 Rc6 Ba3 Bc5+ Bxc5 bxc5+ Kc4 Bg6 Rf1 Re6 Kd5 Ke7 Ra1 Bf5 Ra7+ Ke8 Ra8+ Ke7 Ra7+ Ke8 Ra4 h5 Rf4 Bg4 Rxg4 Rxe5+ Kxe5 hxg4 Kd5 Kd7 Kxc5 Kc8 Kc6 Kb8 b4 Ka7 b5 Kb8 b6 Kc8 b7+ Kb8 Kb6 1/2-1/2',
 'e4 e5 Nc3 f6 Nf3 c6 Bc4 Bc5 d3 Qb6 O-O Nh6 h3 d6 Bxh6 gxh6 g3 Bxh3 Kh2 Bxf1 Qxf1 Bxf2 Be6 Nd7 Na4 Qe3 Kg2 h5 c3 Qd2 1-0',
 'e4 e5 Nf3 d6 Nc3 Nc6 Bc4 Be6 Bxe6 fxe6 O-O h5 d3 h4 Bg5 Bxh4 Nh6 h3 Be7 Bxe7 Qxe7 Re1 g5 b3 Nd2 hxg4 f3 Kf2 Qh6 a4 Rh1 Rxh8 Ke1 Ke2 Ke3 Qf4+ Kf2 Rh2 Ne2 Ke3 Nxd4 exd4+ Kxd4 e5+ Ke3 0-1',
 'e4 c5 Bc4 Nc6 Bxf7+ Kxf7 Qh5+ g6 Qxc5 e5 Qe3 Bh6 Qe2 Qg5 f3 Nd4 Qd1 Qxg2 c3 Qxh1 cxd