In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy

!rm *.zip *.csv
!kaggle datasets download -d ronakbadhe/chess-evaluations
!unzip chess-evaluations.zip


rm: cannot remove '*.zip': No such file or directory
rm: cannot remove '*.csv': No such file or directory
Dataset URL: https://www.kaggle.com/datasets/ronakbadhe/chess-evaluations
License(s): other
Downloading chess-evaluations.zip to /content
100% 199M/200M [00:06<00:00, 41.2MB/s]
100% 200M/200M [00:06<00:00, 34.3MB/s]
Archive:  chess-evaluations.zip
  inflating: chessData.csv           
  inflating: random_evals.csv        
  inflating: tactic_evals.csv        


# Introduction
Chess is a popular game that uses strategic thinking and tactical prowess, played for centuries and still counting. This dataset that we will be using comprises approximately 16 million unique chess positions, each evaluated by the Stockfish chess engine at a depth of 22. Stockfish, a state-of-the-art chess analysis tool, provides precise and detailed evaluations of positions, making this dataset highly valuable for research in artificial intelligence, game theory, and machine learning. The extensive depth of 22 ensures a deep and thorough analysis of each position, offering insights into optimal moves and strategies. This dataset can be instrumental in training advanced machine learning models, developing new chess algorithms, and conducting comprehensive studies on chess strategy and position evaluation.

In [None]:
chessData_df = pd.read_csv('chessData.csv')
chessData_df

Unnamed: 0,FEN,Evaluation
0,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,-10
1,rnbqkbnr/pppp1ppp/4p3/8/4P3/8/PPPP1PPP/RNBQKBN...,+56
2,rnbqkbnr/pppp1ppp/4p3/8/3PP3/8/PPP2PPP/RNBQKBN...,-9
3,rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPP2PPP/RNBQKB...,+52
4,rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPPN1PPP/R1BQK...,-26
...,...,...
12958030,r1bqkb1r/pp3ppp/1nn1p3/3pP3/3P1P2/1B3N2/PP2Q1P...,+6
12958031,r2qkb1r/pp1b1ppp/1nn1p3/3pP3/3P1P2/1B3N2/PP2Q1...,+84
12958032,r2qkb1r/pp1b1ppp/1nn1p3/3pP3/3P1P2/1BN2N2/PP2Q...,0
12958033,r2qkb1r/pp1b1ppp/1n2p3/n2pP3/3P1P2/1BN2N2/PP2Q...,+115


## Features to Add
### Unofficial list
- Material Advantage/Disadvantage
- Development of Pieces
- ~~Close to Pawn Promotion (?)~~
- ~~Castle Ability~~ (Already provided by FEN string)
- ~~Pawn Structure (?)~~

# Mobility Mapping (Incredibly Ugly Code)
#### Hidden for your eyes' safety

In [128]:
# Mobility board to visualize where pieces can go and defend.
def mobility_board(board):
  all_squares = []
  for i in range(8):
    for j in range(8):
      all_squares.append(f'{chr(i + 97)}{j + 1}')

  def update(possible_moves, piece, board, color):
    for move in possible_moves:
      coord = Board.pos(move, board)
      if coord == None:
        continue
      occ = coord['Occupied']
      if occ == '':
        coord['Move/Capture'].append(piece)
      elif occ.islower():
        if color == 'b':
          coord['Defend'].append(piece)
        else:
          coord['Move/Capture'].append(piece)
      else:
        if color == 'b':
          coord['Move/Capture'].append(piece)
        else:
          coord['Defend'].append(piece)

  def knight(pos, color):
    piece = 'n' if color == 'b' else 'N'
    col = pos[0]
    row = pos[1]
    possible_moves = [f'{chr(ord(col) + 1)}{str(int(row) + 2)}', f'{chr(ord(col) + 1)}{str(int(row) - 2)}', \
                      f'{chr(ord(col) - 1)}{str(int(row) + 2)}', f'{chr(ord(col) - 1)}{str(int(row) - 2)}', \
                      f'{chr(ord(col) + 2)}{str(int(row) + 1)}', f'{chr(ord(col) + 2)}{str(int(row) - 1)}', \
                      f'{chr(ord(col) - 2)}{str(int(row) + 1)}', f'{chr(ord(col) - 2)}{str(int(row) - 1)}']

    update(possible_moves, piece, board, color)

  def bishop(pos, color):
    piece = 'b' if color == 'b' else 'B'
    col = pos[0]
    row = pos[1]
    possible_moves = []
    for i in range(1, 8):
      move = f'{chr(ord(col) + i)}{str(int(row) + i)}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)
    for i in range(1, 8):
      move = f'{chr(ord(col) + i)}{str(int(row) - i)}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)
    for i in range(1, 8):
      move = f'{chr(ord(col) - i)}{str(int(row) + i)}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)
    for i in range(1, 8):
      move = f'{chr(ord(col) - i)}{str(int(row) - i)}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)

    update(possible_moves, piece, board, color)

  def rook(pos, color):
    piece = 'r' if color == 'b' else 'R'
    col = pos[0]
    row = pos[1]
    possible_moves = []
    for i in range(1, 8):
      move = f'{chr(ord(col) + i)}{str(int(row))}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)
    for i in range(1, 8):
      move = f'{chr(ord(col) - i)}{str(int(row))}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)
    for i in range(1, 8):
      move = f'{chr(ord(col))}{str(int(row) + i)}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)
    for i in range(1, 8):
      move = f'{chr(ord(col))}{str(int(row) - i)}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)

    update(possible_moves, piece, board, color)

  def queen(pos, color):
    piece = 'q' if color == 'b' else 'Q'
    col = pos[0]
    row = pos[1]
    possible_moves = []
    ### Bishop ###
    for i in range(1, 8):
      move = f'{chr(ord(col) + i)}{str(int(row) + i)}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)
    for i in range(1, 8):
      move = f'{chr(ord(col) + i)}{str(int(row) - i)}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)
    for i in range(1, 8):
      move = f'{chr(ord(col) - i)}{str(int(row) + i)}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)
    for i in range(1, 8):
      move = f'{chr(ord(col) - i)}{str(int(row) - i)}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)

    ### Rook ###
    for i in range(1, 8):
      move = f'{chr(ord(col) + i)}{str(int(row))}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)
    for i in range(1, 8):
      move = f'{chr(ord(col) - i)}{str(int(row))}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)
    for i in range(1, 8):
      move = f'{chr(ord(col))}{str(int(row) + i)}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)
    for i in range(1, 8):
      move = f'{chr(ord(col))}{str(int(row) - i)}'
      coord = Board.pos(move, board)
      if coord == None or coord['Occupied'] != '':
        possible_moves.append(move)
        break
      possible_moves.append(move)

    update(possible_moves, piece, board, color)
  def pawn(pos, color):
    piece = 'p' if color == 'b' else 'P'
    col = pos[0]
    row = pos[1]
    if color == 'b':
      ### Move ###
      move1 = f'{chr(ord(col))}{str(int(row) - 1)}'
      coord = Board.pos(move1, board)
      if coord['Occupied'] == '':
        coord['Move/Capture'].append(piece)
        if row == '7':
          move2 = f'{chr(ord(col))}{str(int(row) - 2)}'
          coord = Board.pos(move2, board)
          if coord['Occupied'] == '':
            coord['Move/Capture'].append(piece)
      ### Capture/Defend ###
      move = [f'{chr(ord(col) - 1)}{str(int(row) - 1)}', f'{chr(ord(col) + 1)}{str(int(row) - 1)}']
      for m in move:
        coord = Board.pos(m, board)
        if coord == None:
          continue
        occu = coord['Occupied']
        if occu != '':
          if occu.islower():
            coord['Defend'].append(piece)
          else:
            coord['Move/Capture'].append(piece)
        else:
          coord['Pawn_Control'].append(piece)
    else:
      ### Move ###
      move1 = f'{chr(ord(col))}{str(int(row) + 1)}'
      coord = Board.pos(move1, board)
      if coord['Occupied'] == '':
        coord['Move/Capture'].append(piece)
        if row == '2':
          move2 = f'{chr(ord(col))}{str(int(row) + 2)}'
          coord = Board.pos(move2, board)
          if coord['Occupied'] == '':
            coord['Move/Capture'].append(piece)
      ### Capture/Defend ###
      move = [f'{chr(ord(col) - 1)}{str(int(row) + 1)}', f'{chr(ord(col) + 1)}{str(int(row) + 1)}']
      for m in move:
        coord = Board.pos(m, board)
        if coord == None:
          continue
        occu = coord['Occupied']
        if occu != '':
          if occu.islower():
            coord['Move/Capture'].append(piece)
          else:
            coord['Defend'].append(piece)
        else:
          coord['Pawn_Control'].append(piece)

  for square in all_squares:
    coord = Board.pos(square, board)
    piece = coord['Occupied']
    color = 'w' if piece.isupper() else 'b'
    if piece.lower() == 'p':
      pawn(square, color)
    elif piece.lower() == 'q':
      queen(square, color)
    elif piece.lower() == 'r':
      rook(square, color)
    elif piece.lower() == 'b':
      bishop(square, color)
    elif piece.lower() == 'n':
      knight(square, color)

  return board

{'Move/Capture': ['Q', 'B', 'N'],
 'Defend': [],
 'Pawn_Control': [],
 'Occupied': ''}

# Data Exploration
### Since our dataset's only independent variable does not work well as either a continuous value or categorical variable, we will be extracting information from it.
##### Board Object contains the board representation of the FEN string, mobility map of all pieces except the King, and other minor information that can be extracted from the position.

In [None]:
# Convert FEN to an index-based board
def FEN_to_board(fen):
  fen_board = fen.split(' ')[0]
  mobility_board = [[{'Move/Capture': [], 'Defend': [], 'Pawn_Control': [], 'Occupied': ''} for i in range(8)] for j in range(8)]
  row = 0
  idx = 0
  board = ['' for i in range(8)]
  nums = [str(i) for i in range(1, 9)]
  for c in fen_board:
    if c == '/':
      row += 1
      idx = 0
      continue
    elif c not in nums:
      board[row] += c
      mobility_board[row][idx]['Occupied'] = c
    else:
      board[row] += '_' * int(c)
      idx += int(c)
      continue
    idx += 1
  return board, mobility_board

# Board object that will contain all data processing functions.
class Board:
  def __init__(self, fen):
    self.board, self.mobility = FEN_to_board(fen)
    self.board.reverse()
    self.mobility.reverse()
    self.mobility = mobility_board(self.mobility)
    self.fen = fen
    self.moves = int(fen.split(' ')[5])
    self.black_pieces = {'r': 5, 'n': 3, 'b': 3, 'q': 9, 'p': 1, 'k': 0}
    self.white_pieces = {p.upper() : v for p, v in self.black_pieces.items()}

  def __repr__(self):
    board_str = ''
    for row in reversed(range(len(self.__board))):
      board_str += str(row + 1) + ' | '
      for c in self.__board[row]:
        board_str += c + ' '
      board_str += '\n'
    board_str += '   ----------------\n    a b c d e f g h'
    return board_str

  def __str__(self):
    return self.fen

  # Use chess position terminology (i.e. e4, h8)
  @classmethod
  def pos(self, pos, board):
    if len(pos) != 2:
      #print('Not a valid chess position')
      return
    valid_letters = 'abcdefgh'
    valid_num = '12345678'
    col = pos[0].lower()
    row = pos[1]
    if col not in valid_letters or row not in valid_num:
      #print('Not a valid chess position')
      return
    col = ord(col) - 97
    row = int(row) - 1
    return board[row][col]

  # Calculate material advantage of the position (positive value for white, negative value for black)
  def advantage(self):
    white = 0
    black = 0
    for row in self.__board:
      for p in row:
        if p == '_':
          continue
        elif p > 'Z':
          black += self.black_pieces[p]
        else:
          white += self.white_pieces[p]

    return white - black

# TESTING PURPOSES

In [None]:
test = chessData_df.head()
fen_strs = np.array(test['FEN'])
boards = pd.DataFrame(np.array([Board(fen) for fen in fen_strs]), columns=['FEN'])
test = test.drop('FEN', axis=1)
test = pd.concat([test, boards], axis=1)
test

Unnamed: 0,Evaluation,FEN
0,-10,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...
1,56,rnbqkbnr/pppp1ppp/4p3/8/4P3/8/PPPP1PPP/RNBQKBN...
2,-9,rnbqkbnr/pppp1ppp/4p3/8/3PP3/8/PPP2PPP/RNBQKBN...
3,52,rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPP2PPP/RNBQKB...
4,-26,rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPPN1PPP/R1BQK...


In [None]:
advantages = np.array([board.advantage() for board in boards['FEN']])
advantages

array([0, 0, 0, 0, 0])

In [None]:
# chessData_df = chessData_df.assign(material_advantage=advantages)
# chessData_df
test = test.assign(material_advantage=advantages)
test

Unnamed: 0,Evaluation,FEN,material_advantage
0,-10,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,0
1,56,rnbqkbnr/pppp1ppp/4p3/8/4P3/8/PPPP1PPP/RNBQKBN...,0
2,-9,rnbqkbnr/pppp1ppp/4p3/8/3PP3/8/PPP2PPP/RNBQKBN...,0
3,52,rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPP2PPP/RNBQKB...,0
4,-26,rnbqkbnr/ppp2ppp/4p3/3p4/3PP3/8/PPPN1PPP/R1BQK...,0


#Development of Pieces
####The following function will check how well "developed" a player's core pieces are. There are many factors<sup>[[^1]]</sup> to this attribute, so our evaluation would certainly not be the most accurate. We also have our own arbitrary weights for evaluating as we cannot be completely sure how much "better developed" a piece is in relation to other types of pieces. The criteria that we will keep in mind for our evaluation function are the following.
- Queen
  - Penalty for early development (first 5 moves)
  - **Queen mobility**
- Pawn
  - ~~Pawn structure~~ (Difficult and computationally expensive to evaluate)
  - **Pawn Center** (d4 or e4 defended by pawns or creating a double pawn on either d or e columns)
  - Penalty for "d" and "e" pawns being blocked at their starting squares
  - Late game: Penalty for pawns that are still near start position
- Knight
  - Less value if there are less pawns
  - **Knight mobility**
  - Penalty if undefended
- Bishop
  - **Bishop mobility** (greater emphasis on forward mobility)
  - Bishop pair is considered marginally stronger than Bishop Knight and Knight Knight
  - ~~Color Weakness (missing a bishop and poor pawn structure)~~ (See Pawn)
  - Penalty if undefended

> **Mobility omits squares controlled by enemy pawns**


[^1]: https://www.chessprogramming.org/Evaluation_of_Pieces