In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import rcParams
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

In [2]:
# allow output to span multiple output lines in the console
pd.set_option('display.max_columns', 500)

In [3]:
# switch to seaborn default stylistic parameters
# see the useful https://seaborn.pydata.org/tutorial/aesthetics.html
sns.set()
sns.set_context('paper') # 'talk' for slightly larger

# change default plot size
rcParams['figure.figsize'] = 9,7

In [4]:
# column names, without the chess moves column
misc_columnNames = ['PNG_File_Pos - DELETE ME', 'Date of Game', 'Game Result', 'W-ELO', 'B-ELO', 
                    'Num Moves', 'miscDate - DELETE ME', 'result - DELETE ME', 'wELO - DELETE ME', 'bELO - DELETE ME', 
                    'event date - DELETE ME', 'setup - DELETE ME', 'fen - DELETE ME', 'flag - DELETE ME', 'oyrange - DELETE ME', 
                    'bad len - DELETE ME']


In [5]:
# read all data except chess moves,
misc_chess_data = pd.read_csv('https://raw.githubusercontent.com/abecsumb/DataScienceProject/main/Chess_Data.txt', comment = '#', infer_datetime_format = True, header = None, sep = ' ', on_bad_lines = 'skip')
misc_chess_data.drop(misc_chess_data.columns[16], axis = 1, inplace = True)
misc_chess_data.columns = misc_columnNames


In [6]:
# Isolate game moves from everything else.
game_moves = pd.read_csv('https://raw.githubusercontent.com/abecsumb/DataScienceProject/main/Chess_Data.txt', sep = '###', on_bad_lines = 'skip', header = None)


  game_moves = pd.read_csv('https://raw.githubusercontent.com/abecsumb/DataScienceProject/main/Chess_Data.txt', sep = '###', on_bad_lines = 'skip', header = None)


In [7]:
# drop first column of game moves (this is the misc chess data)
game_moves.drop(game_moves.columns[0], axis = 1, inplace = True)


In [8]:
# split game moves df into columns for each move. 
game_moves = game_moves.iloc[:, 0].str.lstrip()
game_moves = game_moves.iloc[:].str.split(pat = ' ', expand = True)


In [9]:
# merge misc data and game moves into one df, and drop all unnecessary columns
chess_data = pd.concat([misc_chess_data, game_moves], axis = 1)
chess_data.drop(labels = ['PNG_File_Pos - DELETE ME', 'miscDate - DELETE ME', 'result - DELETE ME', 
               'wELO - DELETE ME', 'bELO - DELETE ME', 'event date - DELETE ME', 
               'setup - DELETE ME', 'fen - DELETE ME', 'flag - DELETE ME', 'oyrange - DELETE ME', 'bad len - DELETE ME'], axis = 1, inplace = True)


In [10]:
chess_data

Unnamed: 0,Date of Game,Game Result,W-ELO,B-ELO,Num Moves,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313
0,2000.03.14,1-0,2851,,67,W1.d4,B1.d5,W2.c4,B2.e6,W3.Nc3,B3.Nf6,W4.cxd5,B4.exd5,W5.Bg5,B5.Be7,W6.e3,B6.Ne4,W7.Bxe7,B7.Nxc3,W8.Bxd8,B8.Nxd1,W9.Bxc7,B9.Nxb2,W10.Rb1,B10.Nc4,W11.Bxc4,B11.dxc4,W12.Ne2,B12.O-O,W13.Nc3,B13.b6,W14.d5,B14.Na6,W15.Bd6,B15.Rd8,W16.Ba3,B16.Bb7,W17.e4,B17.f6,W18.Ke2,B18.Nc7,W19.Rhd1,B19.Ba6,W20.Ke3,B20.Kf7,W21.g4,B21.g5,W22.h4,B22.h6,W23.Rh1,B23.Re8,W24.f3,B24.Bb7,W25.hxg5,B25.fxg5,W26.d6,B26.Nd5+,W27.Nxd5,B27.Bxd5,W28.Rxh6,B28.c3,W29.d7,B29.Re6,W30.Rh7+,B30.Kg8,W31.Rbh1,B31.Bc6,W32.Rh8+,B32.Kf7,W33.Rxa8,B33.Bxd7,W34.Rh7+,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2000.03.14,1-0,2851,,53,W1.e4,B1.d5,W2.exd5,B2.Qxd5,W3.Nc3,B3.Qa5,W4.d4,B4.Nf6,W5.Nf3,B5.c6,W6.Ne5,B6.Bf5,W7.g4,B7.Be4,W8.f3,B8.Bd5,W9.a3,B9.Nbd7,W10.Be3,B10.Nxe5,W11.dxe5,B11.Nxg4,W12.Bd4,B12.e6,W13.b4,B13.Qd8,W14.Nxd5,B14.Qxd5,W15.c4,B15.Ne3,W16.cxd5,B16.Nxd1,W17.dxc6,B17.bxc6,W18.Rxd1,B18.Be7,W19.Ba6,B19.O-O,W20.Ke2,B20.Rab8,W21.Rc1,B21.Rfd8,W22.Rhd1,B22.c5,W23.Bxc5,B23.Rxd1,W24.Rxd1,B24.Bxc5,W25.bxc5,B25.g6,W26.c6,B26.Rb2+,W27.Rd2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1999.11.20,1-0,2851,,57,W1.e4,B1.e5,W2.Nf3,B2.Nc6,W3.Bc4,B3.Bc5,W4.c3,B4.Nf6,W5.d3,B5.d6,W6.Bb3,B6.O-O,W7.Nbd2,B7.Be6,W8.O-O,B8.Qd7,W9.Re1,B9.Rfe8,W10.Nf1,B10.Ne7,W11.Ng3,B11.Bg4,W12.h3,B12.Be6,W13.Bg5,B13.Kh8,W14.Bxf6,B14.gxf6,W15.d4,B15.exd4,W16.cxd4,B16.Bb4,W17.Re3,B17.Rg8,W18.d5,B18.Bxh3,W19.Qd4,B19.Rg6,W20.Qxb4,B20.c5,W21.Qc3,B21.Bg4,W22.Bc2,B22.Rh6,W23.Nh2,B23.b5,W24.b4,B24.Rc8,W25.Bd3,B25.c4,W26.Bc2,B26.Bh5,W27.Nxh5,B27.Rxh5,W28.Qxf6+,B28.Kg8,W29.Bd1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1999.11.20,1-0,2851,,49,W1.e4,B1.d5,W2.exd5,B2.Qxd5,W3.Nc3,B3.Qa5,W4.d4,B4.e6,W5.Nf3,B5.c6,W6.Bd3,B6.Nf6,W7.O-O,B7.Be7,W8.Re1,B8.Nbd7,W9.Ne5,B9.O-O,W10.Bg5,B10.Qd8,W11.Qf3,B11.Re8,W12.Rad1,B12.Nf8,W13.Ne4,B13.Ng6,W14.h4,B14.Nxe5,W15.dxe5,B15.Nxe4,W16.Bxe4,B16.Qc7,W17.Bxe7,B17.Qxe7,W18.h5,B18.Bd7,W19.h6,B19.gxh6,W20.Qf4,B20.h5,W21.Qh6,B21.f5,W22.exf6,B22.Qf7,W23.Re3,B23.Kh8,W24.Rg3,B24.Rg8,W25.Rg7,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2000.02.20,1/2-1/2,2851,2633,97,W1.e4,B1.e5,W2.Nf3,B2.Nc6,W3.Bb5,B3.a6,W4.Ba4,B4.Nf6,W5.O-O,B5.Be7,W6.Re1,B6.b5,W7.Bb3,B7.d6,W8.c3,B8.O-O,W9.h3,B9.Na5,W10.Bc2,B10.c5,W11.d4,B11.Qc7,W12.Nbd2,B12.Bd7,W13.Nf1,B13.cxd4,W14.cxd4,B14.Rac8,W15.Ne3,B15.Nc6,W16.d5,B16.Nb4,W17.Bb1,B17.a5,W18.a3,B18.Na6,W19.b4,B19.Ra8,W20.Bd2,B20.Rfc8,W21.Bd3,B21.Qb7,W22.g4,B22.g6,W23.Nf1,B23.axb4,W24.axb4,B24.Bd8,W25.Ng3,B25.Nc7,W26.Qe2,B26.Rxa1,W27.Rxa1,B27.Ra8,W28.Qe1,B28.Nfe8,W29.Qc1,B29.Ng7,W30.Rxa8,B30.Qxa8,W31.Bh6,B31.Nce8,W32.Qb2,B32.Qa4,W33.Kg2,B33.Bb6,W34.Bc2,B34.Qa7,W35.Bd3,B35.Qa4,W36.Ne2,B36.Nc7,W37.Nxe5,B37.dxe5,W38.Qxe5,B38.Nce8,W39.Bxg7,B39.Qd1,W40.Bh6,B40.Qxd3,W41.Qe7,B41.Ng7,W42.Ng3,B42.Qc2,W43.Qf6,B43.Nf5,W44.Qxb6,B44.Nh4+,W45.Kh2,B45.Nf3+,W46.Kg2,B46.Nh4+,W47.Kh2,B47.Nf3+,W48.Kg2,B48.Nh4+,W49.Kh2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,2007.05.06,1/2-1/2,2640,2652,69,W1.d4,B1.d5,W2.c4,B2.c6,W3.Nc3,B3.Nf6,W4.e3,B4.e6,W5.Nf3,B5.a6,W6.a3,B6.Nbd7,W7.Qc2,B7.dxc4,W8.Bxc4,B8.c5,W9.dxc5,B9.Bxc5,W10.Ba2,B10.Qc7,W11.Bd2,B11.b6,W12.b4,B12.Bd6,W13.Rc1,B13.Bb7,W14.Ne4,B14.Nxe4,W15.Qxc7,B15.Bxc7,W16.Rxc7,B16.Rb8,W17.Bb1,B17.Nxd2,W18.Kxd2,B18.Kd8,W19.Rc3,B19.Ke7,W20.Rhc1,B20.Rhc8,W21.Bd3,B21.Rxc3,W22.Rxc3,B22.Rc8,W23.Rxc8,B23.Bxc8,W24.Nd4,B24.Ne5,W25.Be2,B25.Kd6,W26.Kc3,B26.Nc6,W27.Nb3,B27.a5,W28.Nd2,B28.axb4+,W29.axb4,B29.f6,W30.f4,B30.Ne7,W31.e4,B31.Ng6,W32.g3,B32.e5,W33.Nc4+,B33.Kc7,W34.Ne3,B34.Bb7,W35.fxe5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
29996,2006.06.29,1-0,2640,2514,67,W1.e4,B1.c6,W2.d4,B2.d5,W3.e5,B3.Bf5,W4.Nf3,B4.e6,W5.Be2,B5.c5,W6.Be3,B6.cxd4,W7.Nxd4,B7.Ne7,W8.Bg5,B8.Qa5+,W9.Nc3,B9.Bg6,W10.b4,B10.Qb6,W11.Ndb5,B11.Nc8,W12.O-O,B12.a6,W13.Na4,B13.Qc6,W14.c4,B14.axb5,W15.cxd5,B15.exd5,W16.Rc1,B16.bxa4,W17.Bb5,B17.Be7,W18.Qxd5,B18.O-O,W19.Bxc6,B19.Nxc6,W20.Bxe7,B20.N8xe7,W21.Qc5,B21.Nf5,W22.Rfd1,B22.Rfd8,W23.Rxd8+,B23.Rxd8,W24.b5,B24.Ncd4,W25.Qd5,B25.Rf8,W26.Rc4,B26.h5,W27.h3,B27.a3,W28.Kh2,B28.Ne6,W29.Qxb7,B29.Nfd4,W30.Rc3,B30.Kh7,W31.Rxa3,B31.Rd8,W32.Rc3,B32.Ne2,W33.Rc8,B33.Rd2,W34.Ra8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
29997,1998.??.??,1-0,2640,2185,63,W1.e4,B1.e6,W2.d4,B2.d5,W3.Nd2,B3.Nf6,W4.e5,B4.Nfd7,W5.Bd3,B5.c5,W6.c3,B6.Nc6,W7.Ne2,B7.cxd4,W8.cxd4,B8.Qb6,W9.Nf3,B9.f6,W10.exf6,B10.Nxf6,W11.O-O,B11.Bd6,W12.Nc3,B12.O-O,W13.Be3,B13.Kh8,W14.Rc1,B14.Bd7,W15.a3,B15.Rae8,W16.Na4,B16.Qd8,W17.Nc5,B17.Bc8,W18.Bb5,B18.Re7,W19.b4,B19.Nb8,W20.Ne5,B20.Rc7,W21.Bd3,B21.b6,W22.Nb3,B22.Bd7,W23.Qf3,B23.Rxc1,W24.Rxc1,B24.Be8,W25.Qh3,B25.Bf7,W26.Bg5,B26.h6,W27.Bxh6,B27.gxh6,W28.Qxh6+,B28.Kg8,W29.Qg5+,B29.Kh8,W30.Rc8,B30.Qe7,W31.Rxf8+,B31.Qxf8,W32.Qxf6+,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
29998,1991.??.??,1/2-1/2,2640,2575,108,W1.d4,B1.Nf6,W2.c4,B2.e6,W3.Nf3,B3.b6,W4.Nc3,B4.Bb4,W5.Bg5,B5.Bb7,W6.e3,B6.h6,W7.Bh4,B7.Bxc3+,W8.bxc3,B8.d6,W9.Nd2,B9.g5,W10.Bg3,B10.Qe7,W11.h4,B11.Rg8,W12.hxg5,B12.hxg5,W13.Qa4+,B13.Qd7,W14.Qd1,B14.Qe7,W15.Rh6,B15.Nbd7,W16.Qa4,B16.a5,W17.c5,B17.dxc5,W18.Bxc7,B18.Nd5,W19.Ne4,B19.Nxc7,W20.Nf6+,B20.Kf8,W21.Qxd7,B21.Qxd7,W22.Nxd7+,B22.Ke7,W23.Nxb6,B23.Rab8,W24.dxc5,B24.Nd5,W25.Rc1,B25.Nxb6,W26.cxb6,B26.Ba8,W27.Rb1,B27.Rgc8,W28.Kd2,B28.Rc6,W29.f3,B29.Rcxb6,W30.Rxb6,B30.Rxb6,W31.Kc2,B31.Rb8,W32.Bc4,B32.Bc6,W33.Rh5,B33.Kf6,W34.Rh1,B34.Ba4+,W35.Bb3,B35.Bc6,W36.Rd1,B36.g4,W37.Rd4,B37.gxf3,W38.gxf3,B38.Rh8,W39.Kd3,B39.Ke7,W40.f4,B40.f5,W41.Ba4,B41.Be4+,W42.Kc4,B42.Rh3,W43.Rd7+,B43.Kf6,W44.Kd4,B44.Rh2,W45.Ra7,B45.Rxa2,W46.Be8,B46.Rd2+,W47.Kc4,B47.e5,W48.fxe5+,B48.Kxe5,W49.Rxa5+,B49.Bd5+,W50.Kc5,B50.Rc2,W51.Ra3,B51.Ke4,W52.Bb5,B52.Ra2,W53.Rxa2,B53.Bxa2,W54.Bd7,B54.Bb1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
