# Chess Tree

In [1]:
import pandas as pd
from pgnhelper.eco import create_eco_db

### Import and preprocess data

### Optimisation
- Fen to string
- Depth x 2 and convert to small int
- Opening to string
- Clear opening and variation
- Make total_games smaller int
- Make white, black, draw and move_perc less precise float
- Make move very small string
- Make made_move into boolean and reflect this in any functions
- For time being, drop all irrelevant columns. Only need:
    - depth
    - pgn
    - play
    - made_move
    - move
    - parent_index

In [2]:
df = pd.read_pickle('raw_results.pkl')
df = df.drop(columns=['id'])
df['pgn'] = ''
df['parent_index'] = ''
df2 = df[['parent_index','depth','made_move','play','move','pgn','opening','variation']].copy()
df2['made_move'] = df2.made_move.apply(lambda x: True if x=='w' else False)
df2['depth'] = df2.depth*2
df2 = df2.astype({'depth':'uint8','move':'str'})
df2 = df2.sort_values(by='depth')
df2 = df2.reset_index(drop=True)
#df2 = df2.convert_dtypes()

In [3]:
def get_pgn(row,parent=False):    
    current_move = str(row['move']) + " "
    if row['made_move']:
        move_num = str(int((row['depth']+1)/2)) + ". "
        current_move = move_num + current_move
    if parent:   
        return (parent['pgn'] + current_move)
    else:
        return current_move

In [4]:
def process_game(x):
    def vectorised_pgn(x):
        counter = x.name
        index = x.name
        if counter%500 == 0:
                print("Row: " + str(index))
        if x['depth'] != 1:
            play = x['play']
            parent_play = play[:-1]
            parent = df2[df2['play'].apply(lambda y : y == parent_play)]
            parent_i = int(parent.index[0])
            parent = parent.squeeze().to_dict()
            df2.at[index,'parent_index'] = parent_i
            
    def add_openings(x):   
        import chess
        import chess.polyglot
        import chess.pgn 
        index = x.name
        board = chess.Board()
        game = chess.pgn.Game()
        game.headers["White"] = "Ed"
        game.headers["Black"] = "The internet"
        game.setup(board)
        node = game 

        moves = x['play']
        for move in moves:
            uci_move = move.strip("' '")
            move = chess.Move.from_uci(uci_move)
            node = node.add_variation(move) # Add game node
            board.push(move)
        
        game = str(game)
        pgn = add_eco2(game)

        opening, openingT, variation, variationT = ['','','','']
        if 'Opening' in pgn.headers: opening = pgn.headers['Opening']
        if 'OpeningT' in pgn.headers: openingT = pgn.headers['OpeningT']
        if 'Variation' in pgn.headers: variation = pgn.headers['Variation']
        if 'VariationT' in pgn.headers: variationT = pgn.headers['VariationT']
        variations = list(filter(None,[variation,variationT]))
        openings = list(filter(None,[opening,openingT]))
        df2.at[index,'opening'] = openings        # set openings
        df2.at[index,'variation'] = variations    # set variations
        df2.at[index,'pgn'] = pgn                # set pgn
            
    def add_eco2(inpgn: str, ply: int = 4, maxply: int = 24):
        import chess
        import chess.polyglot
        import chess.pgn 
        import io
        global eco_db
        while True:
            game_file = io.StringIO(inpgn)
            game = chess.pgn.read_game(game_file)
            
            if game is None:
                break
            first_eco, eco_t = None, None
            first_opening, opening_t = None, None
            first_variation, variation_t = None, None
            for node in game.mainline():
                board = node.board()
                gply = board.ply()
                epd = board.epd()

                # After the first move check the position if it is in eco db.
                if gply >= 1:
                    if epd in eco_db:

                        # Update first eco up to a given ply only.
                        if gply <= ply:
                            first_eco = eco_db[epd]['eco']
                            first_opening = eco_db[epd]['opening']
                            first_variation = eco_db[epd]['variation']

                        # Else update eco by transposition.
                        else:
                            eco_t = eco_db[epd]['eco']
                            opening_t = eco_db[epd]['opening']
                            variation_t = eco_db[epd]['variation']

                    if gply >= maxply:
                        break
            mygame = game
            if first_eco is not None:
                mygame.headers['ECO'] = first_eco
                mygame.headers['Opening'] = first_opening
                if first_variation is not None:
                    mygame.headers['Variation'] = first_variation
            if eco_t is not None:
                mygame.headers['ECOT'] = eco_t
                mygame.headers['OpeningT'] = opening_t
                if variation_t is not None:
                    mygame.headers['VariationT'] = variation_t
            return mygame


    vectorised_pgn(x)
    add_openings(x)

In [5]:
counter = 0
df2 = df2.query('depth < 11')
total = len(df2)
eco_db = create_eco_db('eco.pgn')

df2.apply(process_game,axis=1)


Row: 0
Row: 500
Row: 1000
Row: 1500
Row: 2000
Row: 2500
Row: 3000
Row: 3500
Row: 4000
Row: 4500
Row: 5000
Row: 5500
Row: 6000
Row: 6500
Row: 7000
Row: 7500
Row: 8000
Row: 8500
Row: 9000
Row: 9500
Row: 10000
Row: 10500
Row: 11000
Row: 11500
Row: 12000
Row: 12500
Row: 13000
Row: 13500
Row: 14000
Row: 14500
Row: 15000
Row: 15500
Row: 16000
Row: 16500
Row: 17000
Row: 17500
Row: 18000
Row: 18500
Row: 19000
Row: 19500
Row: 20000
Row: 20500
Row: 21000
Row: 21500
Row: 22000
Row: 22500
Row: 23000
Row: 23500
Row: 24000
Row: 24500
Row: 25000
Row: 25500
Row: 26000
Row: 26500
Row: 27000
Row: 27500
Row: 28000
Row: 28500
Row: 29000
Row: 29500
Row: 30000
Row: 30500
Row: 31000
Row: 31500
Row: 32000
Row: 32500
Row: 33000
Row: 33500
Row: 34000
Row: 34500
Row: 35000
Row: 35500
Row: 36000
Row: 36500
Row: 37000
Row: 37500
Row: 38000
Row: 38500
Row: 39000
Row: 39500
Row: 40000
Row: 40500
Row: 41000
Row: 41500
Row: 42000
Row: 42500
Row: 43000
Row: 43500
Row: 44000
Row: 44500
Row: 45000
Row: 45500
Row: 46000
R

0         None
1         None
2         None
3         None
4         None
          ... 
153827    None
153828    None
153829    None
153830    None
153831    None
Length: 153832, dtype: object

In [9]:
df2.head()

Unnamed: 0,parent_index,depth,made_move,play,move,pgn,opening,variation
0,,1,True,[e2e4],e4,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[King's pawn game],[]
1,,1,True,[d2d4],d4,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[Queen's pawn game],[]
2,0.0,2,False,"[e2e4, e7e5]",e5,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[King's pawn game],[]
3,0.0,2,False,"[e2e4, c7c6]",c6,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[Caro-Kann],[]
4,1.0,2,False,"[d2d4, e7e6]",e6,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[Queen's pawn game],[]


In [6]:
df2.to_csv('graph_ready.csv')
df2.to_pickle('graph_ready.pkl')

In [13]:
initial_df = df.drop(columns=['parent_index','depth','made_move','move','pgn','opening','variation'])

In [14]:
initial_df['play'] = initial_df['play'].astype('str')
df2['play'] = df2['play'].astype('str')

In [28]:
final_df = pd.merge(df2,initial_df, on='play')

In [29]:
final_df.head()

Unnamed: 0,parent_index,depth,made_move,play,move,pgn,opening,variation,fen,total_games,white,draw,black,move_perc,move_total_games,move_white,move_draw,move_black
0,,1,True,['e2e4'],e4,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[King's pawn game],[],rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,140441892,0.496258,0.041152,0.46259,0.639511,89814198,44571001,3696026,41547171
1,,1,True,['d2d4'],d4,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[Queen's pawn game],[],rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR ...,140441892,0.505183,0.042948,0.45187,0.241264,33883522,17117366,1455221,15310935
2,0.0,2,False,"['e2e4', 'e7e5']",e5,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[King's pawn game],[],rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...,89672875,0.509467,0.041142,0.44939,0.466781,41857620,21325081,1722127,18810412
3,0.0,2,False,"['e2e4', 'c7c6']",c6,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[Caro-Kann],[],rnbqkbnr/pp1ppppp/2p5/8/4P3/8/PPPP1PPP/RNBQKBN...,89672875,0.4711,0.043342,0.485558,0.06078,5450279,2567627,236226,2646426
4,1.0,2,False,"['d2d4', 'e7e6']",e6,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[Queen's pawn game],[],rnbqkbnr/pppp1ppp/4p3/8/3P4/8/PPP1PPPP/RNBQKBN...,33876402,0.5108,0.041486,0.447714,0.098809,3347308,1709805,138865,1498638


In [31]:
final_df_ex = final_df.explode('opening')

In [32]:
final_df.head(100)

Unnamed: 0,parent_index,depth,made_move,play,move,pgn,opening,variation,fen,total_games,white,draw,black,move_perc,move_total_games,move_white,move_draw,move_black
0,,1,True,['e2e4'],e4,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[King's pawn game],[],rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,140441892,0.496258,0.041152,0.462590,0.639511,89814198,44571001,3696026,41547171
1,,1,True,['d2d4'],d4,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[Queen's pawn game],[],rnbqkbnr/pppppppp/8/8/3P4/8/PPP1PPPP/RNBQKBNR ...,140441892,0.505183,0.042948,0.451870,0.241264,33883522,17117366,1455221,15310935
2,0,2,False,"['e2e4', 'e7e5']",e5,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[King's pawn game],[],rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...,89672875,0.509467,0.041142,0.449390,0.466781,41857620,21325081,1722127,18810412
3,0,2,False,"['e2e4', 'c7c6']",c6,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[Caro-Kann],[],rnbqkbnr/pp1ppppp/2p5/8/4P3/8/PPPP1PPP/RNBQKBN...,89672875,0.471100,0.043342,0.485558,0.060780,5450279,2567627,236226,2646426
4,1,2,False,"['d2d4', 'e7e6']",e6,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[Queen's pawn game],[],rnbqkbnr/pppp1ppp/4p3/8/3P4/8/PPP1PPPP/RNBQKBN...,33876402,0.510800,0.041486,0.447714,0.098809,3347308,1709805,138865,1498638
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,11,4,False,"['e2e4', 'd7d5', 'g1f3', 'd5e4']",dxe4,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[Zukertort],"[Tennison (Lemberg, Zukertort) gambit]",rnbqkbnr/ppp1pppp/8/8/4p3/5N2/PPPP1PPP/RNBQKB1...,534809,0.484372,0.037197,0.478430,0.860632,460274,222944,17121,220209
96,23,4,False,"['e2e4', 'e7e6', 'f1c4', 'd7d5']",d5,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[French],[],rnbqkbnr/ppp2ppp/4p3/3p4/2B1P3/8/PPPP1PPP/RNBQ...,449130,0.430044,0.042735,0.527221,0.540701,242845,104434,10378,128033
97,45,4,False,"['d2d4', 'd7d5', 'b1c3', 'g8f6']",Nf6,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[Queen's pawn game],[Chigorin variation],rnbqkb1r/ppp1pppp/5n2/3p4/3P4/2N5/PPP1PPPP/R1B...,947879,0.490082,0.043280,0.466638,0.434037,411415,201627,17806,191982
98,42,4,False,"['e2e4', 'e7e6', 'g1f3', 'd7d5']",d5,"[Event ""?""]\n[Site ""?""]\n[Date ""????.??.??""]\n...",[French],[],rnbqkbnr/ppp2ppp/4p3/3p4/4P3/5N2/PPPP1PPP/RNBQ...,2626073,0.461902,0.045508,0.492590,0.560947,1473089,680423,67037,725629


In [35]:
final_df_ex.to_csv('graph_ready.csv')
final_df_ex.to_pickle('graph_ready.pkl')

In [None]:
df = pd.read_pickle('graph_ready.pkl')
df = df.set_index('index1')