### Import libraries

In [128]:
import chessdotcom
from chessdotcom import ChessDotComClient
import chess.engine
import re
import pandas as pd
pd.set_option('display.max_colwidth', None)
import time
import random
import numpy as np

<br>

## Gathering random chess.com players from each the top 20 countries with the most players
>#### The code below will gather 200 total players. For the data file I used in my analysis I gathered 100,000 total players
>#### The more players you want, the longer the program will take to run

In [49]:
client = ChessDotComClient(user_agent = "My Python Application...")

country_list = ['IN', 'US', 'PH', 'GB', 'BR', 'ID', 'DE', 'FR', 'RU', 'UA', 'CA', 'TR', 'IT', 'ES', 'VN', 'PL', 'EG', 'MX', 'AU', 'MY']
big_list = []
for country in country_list:
    response = client.get_country_players(country)
    country_players = response.players
    sample = random.sample(country_players, 10)
    big_list.extend(sample)

len(big_list)

200

<br>

## Filtering the list to only contain players between 900 and 1100 skill rating for rapid games

In [51]:
rapid_players = []
for player in range(len(big_list)):
    try:
        response2 = client.get_player_stats(big_list[player])
        if response2.stats.chess_rapid is None:
            continue
        elif response2.stats.chess_rapid.last.rating >= 900 and response2.stats.chess_rapid.last.rating <= 1100:
            rapid_players.append(big_list[player])
        else:
            continue
    except chessdotcom.errors.ChessDotComClientError:
        continue

len(rapid_players)

28

<br>

## Gathering the most recent rapid game that meets specific criteria from the last 50 games each user played
>#### The game must not end in a draw, the game must have 10 minute time controls, the players must both be within the skill range

In [63]:
big_rapid_list = []
non_draws = ['win', 'resigned', 'checkmated', 'timeout']
for i in range(len(rapid_players)):
    try:
        response3 = client.get_player_game_archives(rapid_players[i])
        months_list = []
        for j in range(len(response3.archives)):
            months_list.append(response3.archives[j][-7:])
    
        counter = 0
        for recent_month in reversed(months_list):
            year = int(recent_month[:4])
            month = int(recent_month[5:])
            response4 = client.get_player_games_by_month(rapid_players[i], year, month)
            game_appended = False
            for l in range(len(response4.games)):
                if response4.games[l].pgn and response4.games[l].white.result in non_draws and response4.games[l].time_class == 'rapid' and response4.games[l].time_control == '600' and (response4.games[l].white.rating >= 900 and response4.games[l].white.rating <= 1100) and (response4.games[l].black.rating >= 900 and response4.games[l].black.rating <= 1100) and (response4.games[l].pgn[:20] == '[Event "Live Chess"]' or response4.games[l].pgn[:20] == '[Event "Let\'s Play"]'):
                    big_rapid_list.append(response4.games[l])
                    game_appended = True
                    break
                else: 
                    counter += 1
                    if counter > 50:
                        break
            if game_appended or counter > 50:
                break
    except Exception:
        continue

len(big_rapid_list)

14

<br>

## Function to extract every move that was played in a given game and store them in a list
>#### Regular Expressions were used because the move data was stored in a large text string for each game

In [65]:
def game_moves(i):
    game = rapid_games[i]
    pgn = game.pgn
    move_pattern = r'(?:\d+\.\.?\.?\s)(O-O-O|O-O|\w+\d?\+?\=?\w?\#?)'
    move_list = re.findall(move_pattern, pgn)
    return move_list

<br>

## Filtering the list of rapid games to remove any games that had 12 or fewer turns

In [67]:
delete_list = []
rapid_games = big_rapid_list
for i in range(len(rapid_games)):
    if len(game_moves(i)) >= 26:
        pass
    else:
        delete_list.append(i)

rapid_games = np.delete(rapid_games, delete_list)
len(rapid_games)

10

<br>

## Function to create a binary variable for game result

In [19]:
def result_to_binary(result):
    if result == 'win':
        return 1
    else:
        return 0

<br>

## Function to extract the remaining time at the 12th move and calculate the number of seconds since the game started

In [21]:
def elapsed_game_time(i, color):
    try:
        pgn = rapid_games[i].pgn
        
        if color == 'white':
            pattern = r'.{7}]} 12\.{3} '
        else:
            pattern = r'.{7}]} 13\. '
            
        match = re.findall(pattern, pgn)
        pattern2 = r'\d{2}:\d{2}'
        match2 = re.findall(pattern2, match[0])
        time = match2[0]
        minutes, seconds = time.split(':')
        minutes = int(minutes)
        seconds = int(seconds)
        total_seconds = (minutes * 60) + seconds
        start_time = 10 * 60  
        elapsed_time = start_time - total_seconds
        return elapsed_time
        
    except IndexError:
        elapsed_time = 'N/A'
        return elapsed_time

<br>

## Function to retrieve the game opening variations
>#### Regular Expressions are used to capture the names of the openings because they are stored in a URL

In [23]:
def variation(num):
    try:
        text = rapid_games[num].pgn
        pattern0 = r'ECOUrl\s*"([^"]+)"'
        match0 = re.findall(pattern0, text)
        pattern1 = r'[^/]+$'
        match1 = re.findall(pattern1, match0[0])
        pattern2 = r'-\d.*$|with-\d.*$|\.{3}.*$'
        match2 = re.sub(pattern2, "", match1[0])
        match3 = re.sub(r'-', " ", match2)
        return match3
    except IndexError:
        match3 = 'N/A'
        return match3

<br>

## Function to categorize each variation into one of the common types of Chess Openings 
>#### N/A values represent opening variations that are rare/infrequent

In [25]:
def opening(num):
    try:
        my_string = variation(num)
        
        if my_string.startswith('London'):
            my_string = 'London'
        elif my_string.startswith('Alekhine'):
            my_string = 'Alekhine'
        elif my_string.startswith('Benko Gambit'):
            my_string = 'Benko Gambit'
        elif my_string.startswith('Benoni'):
            my_string = 'Benoni'
        elif my_string.startswith('Bird'):
            my_string = 'Bird'
        elif my_string.startswith('Bishop'):
            my_string = 'Bishop'
        elif my_string.startswith('Bogo Indian'):
            my_string = 'Bogo Indian'
        elif my_string.startswith('Caro Kann'):
            my_string = 'Caro Kann'
        elif my_string.startswith('Catalan'):
            my_string = 'Catalan'   
        elif my_string.startswith('Danish Gambit'):
            my_string = 'Danish Gambit'
        elif my_string.startswith('Dutch'):
            my_string = 'Dutch'
        elif my_string.startswith('English'):
            my_string = 'English'
        elif my_string.startswith('Four Knights Game'):
            my_string = 'Four Knights Game'
        elif my_string.startswith('French'):
            my_string = 'French'
        elif my_string.startswith('Grunfeld'):
            my_string = 'Grunfeld'
        elif my_string.startswith('Kings Fianchetto'):
            my_string = 'Kings Fianchetto'
        elif my_string.startswith('Budapest Gambit'):
            my_string = 'Budapest Gambit'
        elif my_string.startswith('Indian Game'):
            my_string = 'Indian Game'
        elif my_string.startswith('Kings Indian Defense'):
            my_string = 'Kings Indian Defense'
        elif my_string.startswith('Italian Game'):
            my_string = 'Italian Game'
        elif my_string.startswith('Modern'):
            my_string = 'Modern'
        elif my_string.startswith('Nimzo Indian'):
            my_string = 'Nimzo Indian'
        elif my_string.startswith('Nimzowitsch Larsen'):
            my_string = 'Nimzowitsch Larsen'
        elif my_string.startswith('Nimzowitsch'):
            my_string = 'Nimzowitsch'
        elif my_string.startswith('Old Indian'):
            my_string = 'Old Indian'
        elif my_string.startswith('Owen'):
            my_string = 'Owen'
        elif my_string.startswith('Philidor'):
            my_string = 'Philidor'
        elif my_string.startswith('Polish'):
            my_string = 'Polish'
        elif my_string.startswith('Ponziani'):
            my_string = 'Ponziani'
        elif my_string.startswith('Colle'):
            my_string = 'Colle'
        elif my_string.startswith('Queens Pawn'):
            my_string = 'Queens Pawn'
        elif my_string.startswith('Queens Gambit'):
            my_string = 'Queens Gambit'
        elif my_string.startswith('Queens Indian'):
            my_string = 'Queens Indian'
        elif my_string.startswith('Petrov'):
            my_string = 'Petrov'
        elif my_string.startswith('Scandinavian'):
            my_string = 'Scandinavian'
        elif my_string.startswith('Scotch Game'):
            my_string = 'Scotch Game'
        elif my_string.startswith('Semi Slav'):
            my_string = 'Semi Slav'
        elif my_string.startswith('Slav'):
            my_string = 'Slav'
        elif my_string.startswith('Alapin Sicilian'):
            my_string = 'Alapin Sicilian'
        elif my_string.startswith('Closed Sicilian'):
            my_string = 'Closed Sicilian'
        elif my_string.startswith('Sicilian'):
            my_string = 'Sicilian'
        elif my_string.startswith('Ruy Lopez'):
            my_string = 'Ruy Lopez'
        elif my_string.startswith('Tarrasch'):
            my_string = 'Tarrasch'
        elif my_string.startswith('Three Knights'):
            my_string = 'Three Knights'
        elif my_string.startswith('Trompowsky'):
            my_string = 'Trompowsky'
        elif my_string.startswith('Vienna Game'):
            my_string = 'Vienna Game'
        elif my_string.startswith('Reti'):
            my_string = 'Reti'
        elif my_string.startswith('Kings Pawn'):
            my_string = 'Kings Pawn'
        elif my_string.startswith('Uncommon'):
            my_string = 'Uncommon'
        elif my_string.startswith('Pirc'):
            my_string = 'Pirc'
        elif my_string.startswith('Center Game'):
            my_string = 'Center Game'
        else:
            my_string = 'N/A'
        return my_string

    except TypeError:
        my_string = 'N/A'
        return my_string

<br>

## Import Stockfish chess engine (You will need to download Stockfish first)

In [27]:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0 Safari/537.36"}
stockfish_path = "C:\\Users\james\OneDrive\Documents\MSA Program\Side Projects\stockfish\stockfish-windows-x86-64-avx2"
engine = chess.engine.SimpleEngine.popen_uci(stockfish_path)

# Function to get the evaluation of the current board position 
# 0.1 seconds processing time for each position strikes a good balance of speed and accuracy
def get_move_evaluation(board, engine):
    info = engine.analyse(board, chess.engine.Limit(time=0.1))
    evaluation = info['score'].relative.score(mate_score=10000)
    return evaluation

<br>

## Function to find the position evaluation at move 12

In [29]:
def move_12_eval(i, color):
    try:
        board = chess.Board()
        evaluations = []
        first_12_moves = game_moves(i)[:24]
        
        # This loop will essentially "play the game" until move 12
        for move_str in first_12_moves:
            board.push_san(move_str)
            evaluation = get_move_evaluation(board, engine)
            evaluations.append(evaluation)

        # Creating a list of the evaluations from every move the player made
        if color == 'white':
            evaluations = evaluations[::2]
        else:
            evaluations = evaluations[1::2]

        eval_12 = evaluations[11] 
        return eval_12
        
    except Exception:
        eval_12 = 'Error'
        return eval_12

<br>

## Creating a binary variable for whether the player castled in the first 12 moves
>#### Also creating a secondary variable for the side of the board it was on

In [31]:
def castle_check(i, color):
    if color == 'white':
        moves = game_moves(i)[::2]
    else:
        moves = game_moves(i)[1::2]

    if 'O-O' in moves[:12] or 'O-O-O' in moves[:12]:
        castle_check = 1
    else:
        castle_check = 0

    if 'O-O-O' in moves[:12]:
        castle_side = 'Queenside'
    elif 'O-O' in moves[:12]:
        castle_side = 'Kingside'
    else:
        castle_side = 'N/A'

    return castle_check, castle_side

<br>

## Function to identify what piece was moved using the first letter of chess notation

In [33]:
def piece_lookup(move):
    if move[0] == 'N':
        piece = 'Knight'
    elif move[0] == 'B':
        piece = 'Bishop'
    elif move[0] == 'K':
        piece = 'King'
    elif move[0] == 'Q':
        piece = 'Queen'
    elif move[0] == 'R':
        piece = 'Rook'
    elif move[0] == 'O':
        piece = 'Castled'
    else:
        piece = 'Pawn'
    return piece

<br>

## Creating a binary variable for whether the queen was moved in the first 12 moves

In [35]:
def queen_check(i, color):
    if color == 'white':
        moves = game_moves(i)[::2]
    else:
        moves = game_moves(i)[1::2]
    
    for i in range(12):
        if piece_lookup(moves[i]) == 'Queen':
            moved_queen = 1
            break
        else:
            moved_queen = 0
    return moved_queen

<br>

## Creating a binary variable for whether a rook was moved in the first 12 moves
>#### Castling does not count as moving a rook

In [37]:
def rook_check(i, color):
    if color == 'white':
        moves = game_moves(i)[::2]
    else:
        moves = game_moves(i)[1::2]

    for i in range(12):
        if piece_lookup(moves[i]) == 'Rook':
            moved_rook = 1
            break
        else:
            moved_rook = 0
    return moved_rook

<br>

## Creating a variable for the total number of minor pieces (Knights or Bishops) moved by move 12 

In [39]:
def minor_piece_count(i, color):
    if color == 'white':
        moves = game_moves(i)[::2]
        left_knight_moves = ['Na3', 'Nc3', 'Nd2', 'Nbd2', 'Nxa3', 'Nxc3', 'Nxd2', 'Nbxd2']
        right_knight_moves = ['Ne2', 'Nf3', 'Nh3', 'Nge2', 'Nxe2', 'Nxf3', 'Nxh3', 'Ngxe2']
        left_bishop_moves = ['Bb2', 'Ba3', 'Ba3+', 'Bd2', 'Be3', 'Bf4', 'Bg5', 'Bg5+', 'Bh6', 'Bxb2', 'Bxa3', 'Bxa3+', 'Bxd2', 'Bxe3', 'Bxf4', 'Bxg5', 'Bxg5+', 'Bxh6']
        right_bishop_moves = ['Bg2', 'Bh3', 'Bh3+', 'Be2', 'Bd3', 'Bc4', 'Bc4+', 'Bb5', 'Bb5+', 'Ba6', 'Bxg2', 'Bxh3', 'Bxh3+', 'Bxe2', 'Bxd3', 'Bxc4', 'Bxc4+', 'Bxb5', 'Bxb5+', 'Bxa6']
    else:
        moves = game_moves(i)[1::2]
        left_knight_moves = ['Na6', 'Nc6', 'Nd7', 'Nbd7', 'Nxa6', 'Nxc6', 'Nxd7', 'Nbxd7']
        right_knight_moves = ['Ne7', 'Nf6', 'Nh6', 'Nge7', 'Nxe7', 'Nxf6', 'Nxh6', 'Ngxe7']
        left_bishop_moves = ['Bb7', 'Ba6', 'Ba6+', 'Bd7', 'Be6', 'Bf5', 'Bg4', 'Bg4+', 'Bh3', 'Bxb7', 'Bxa6', 'Bxa6+', 'Bxd7', 'Bxe6', 'Bxf5', 'Bxg4', 'Bxg4+', 'Bxh3']
        right_bishop_moves = ['Bg7', 'Bh6', 'Bh6+', 'Be7', 'Bd6', 'Bc5', 'Bc5+', 'Bb4', 'Bb4+', 'Ba3', 'Bxg7', 'Bxh6', 'Bxh6+', 'Bxe7', 'Bxd6', 'Bxc5', 'Bxc5+', 'Bxb4', 'Bxb4+', 'Bxa3']

    minor_piece_count = 0
    for i in range(12):
        if moves[i] in left_knight_moves:
            minor_piece_count += 1
            left_knight_moves = []
        elif moves[i] in right_knight_moves:
            minor_piece_count += 1
            right_knight_moves = []
        elif moves[i] in left_bishop_moves:
            minor_piece_count += 1
            left_bishop_moves = []
        elif moves[i] in right_bishop_moves:
            minor_piece_count += 1
            right_bishop_moves = []

    return minor_piece_count

<br>

## Creating a variable for the sum of the vertical positions of each piece by move 12
>#### This is a way to measure aggressivness 

In [41]:
def piece_depth(i, color):
    try:
        if color == 'white':
            moves = game_moves(i)[::2]
        else:
            moves = game_moves(i)[1::2]
            
        piece_depth = []
        for i in range(12):
            test_move = moves[i]
            if test_move == 'O-O' or test_move == 'O-O-O':
                piece_depth.append(1)
            else:
                pattern = r'(\d)\+?$'
                match = re.findall(pattern, test_move)
                if color == 'white':
                    piece_depth.append(match[0])
                    piece_depth = list(map(int, piece_depth))
                else:
                    value = int(match[0])
                    value = 9 - value
                    piece_depth.append(value)
                
        total = sum(piece_depth)
        return total
        
    except IndexError:
        total = 'N/A'
        return total  

<br>

## Creating a variable for the total number of moves in the 16 center squares by move 12

In [43]:
def center_count(i, color):
    try:
        if color == 'white':
            moves = game_moves(i)[::2]
        else:
            moves = game_moves(i)[1::2]

        center_squares = ['c3', 'c4', 'c5', 'c6', 'd3', 'd4', 'd5', 'd6', 'e3', 'e4', 'e5', 'e6', 'f3', 'f4', 'f5', 'f6']
        center_moves = []
        for i in range(12):
            if moves[i] == 'O-O' or moves[i] == 'O-O-O':
                pass
            else:
                pattern = r'([a-z]\d)\+?$'
                match = re.findall(pattern, moves[i])
                if match[0] in center_squares:
                    center_moves.append(match[0])
        
        total_center_moves = len(center_moves)
        return total_center_moves
        
    except IndexError:
        total_center_moves = 'N/A'
        return total_center_moves

<br>

## Creating the final dataframe

In [71]:
# Initializing lists
url = []
player = []
result = []
result_binary = []
piece_color = []
rating_difference = []    
elapsed_time = []
openings = []
evaluation_move_12 = []
castle_binary = []
castle_side = []
queen_binary = []
rook_binary = []
development = []
aggressiveness = []
center_moves = []

# Loop to call functions and add output to each list
for i in range(len(rapid_games)):
    if rapid_games[i].white.username.lower() in rapid_players:
        url.append(rapid_games[i].url)
        player.append(rapid_games[i].white.username)
        result.append(rapid_games[i].white.result)
        result_binary.append(result_to_binary(rapid_games[i].white.result))
        piece_color.append('white')
        rating_difference.append(rapid_games[i].white.rating - rapid_games[i].black.rating)
        elapsed_time.append(elapsed_game_time(i, 'white'))
        openings.append('White - ' + opening(i))
        evaluation_move_12.append(move_12_eval(i, 'white'))
        castle_binary.append(castle_check(i, 'white')[0])
        castle_side.append(castle_check(i, 'white')[1])
        queen_binary.append(queen_check(i, 'white'))
        rook_binary.append(rook_check(i, 'white'))
        development.append(minor_piece_count(i, 'white'))
        aggressiveness.append(piece_depth(i, 'white'))
        center_moves.append(center_count(i, 'white'))
        continue
    
    elif rapid_games[i].black.username.lower() in rapid_players:
        url.append(rapid_games[i].url)
        player.append(rapid_games[i].black.username)
        result.append(rapid_games[i].black.result)
        result_binary.append(result_to_binary(rapid_games[i].black.result))
        piece_color.append('black')
        rating_difference.append(rapid_games[i].black.rating - rapid_games[i].white.rating)
        elapsed_time.append(elapsed_game_time(i, 'black'))
        openings.append('Black - ' + opening(i))
        evaluation_move_12.append(move_12_eval(i, 'black'))
        castle_binary.append(castle_check(i, 'black')[0])
        castle_side.append(castle_check(i, 'black')[1])
        queen_binary.append(queen_check(i, 'black'))
        rook_binary.append(rook_check(i, 'black'))
        development.append(minor_piece_count(i, 'black'))
        aggressiveness.append(piece_depth(i, 'black'))
        center_moves.append(center_count(i, 'black'))

# Create final dataframe
columns = ['URL', 'Player', 'Result', 'Result_Binary', 'Piece_Color', 'Rating_Difference', 'Elapsed_Time', 'Opening',
          'Evaluation', 'Castle_Present', 'Castle_Side', 'Queen_Moved', 'Rook_Moved', 'Minor_Pieces_Developed', 
          'Aggressiveness', 'Center_Moves']
chess_games = pd.DataFrame(columns=columns)
chess_games['URL'] = url
chess_games['Player'] = player
chess_games['Result'] = result
chess_games['Result_Binary'] = result_binary
chess_games['Piece_Color'] = piece_color
chess_games['Rating_Difference'] = rating_difference
chess_games['Elapsed_Time'] = elapsed_time
chess_games['Opening'] = openings
chess_games['Evaluation'] = evaluation_move_12
chess_games['Castle_Present'] = castle_binary
chess_games['Castle_Side'] = castle_side
chess_games['Queen_Moved'] = queen_binary
chess_games['Rook_Moved'] = rook_binary
chess_games['Minor_Pieces_Developed'] = development
chess_games['Aggressiveness'] = aggressiveness
chess_games['Center_Moves'] = center_moves

chess_games.tail(5)

Unnamed: 0,URL,Player,Result,Result_Binary,Piece_Color,Rating_Difference,Elapsed_Time,Opening,Evaluation,Castle_Present,Castle_Side,Queen_Moved,Rook_Moved,Minor_Pieces_Developed,Aggressiveness,Center_Moves
5,https://www.chess.com/game/live/121970335954,bruno24800GG,win,1,white,70,101,White - Four Knights Game,60,1,Kingside,1,0,3,47,9
6,https://www.chess.com/game/live/121874710612,ANDRESLOPEZGARRIDO,timeout,0,black,-38,45,Black - Philidor,-430,0,,0,0,2,49,7
7,https://www.chess.com/game/live/60613150045,aldom99,win,1,white,-17,66,White - Closed Sicilian,-218,1,Kingside,1,0,3,41,8
8,https://www.chess.com/game/live/121234829076,Banan1005,checkmated,0,white,-51,76,White - Kings Fianchetto,358,0,,0,1,3,32,5
9,https://www.chess.com/game/live/122011343288,Aeskeladd,win,1,black,22,64,Black - Italian Game,81,0,,1,0,3,40,10


In [41]:
# Save dataframe as an excel file
chess_games.to_excel('Chess_Game_Data.xlsx', index=False)

In [43]:
# Save rapid_games list to a pickle file if needed for future use
import pickle
with open("rapid_games_0108.pkl", "wb") as file:
    pickle.dump(rapid_games, file)

In [45]:
# Save rapid_players list to a pickle file if needed for future use
with open("rapid_players_0108.pkl", "wb") as file:
    pickle.dump(rapid_players, file)