In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.plotly as py
%matplotlib inline

In [2]:
moves = pd.read_hdf('/home/grobleke/OTB.h5', mode='r', key='moves')
moves = moves[moves.Ply <= 10]
moves = moves[['GameID', 'Ply', 'MovedPiece', 'SourceCol', 'SourceRow', 'DestCol', 'DestRow']]

In [3]:
games = pd.read_hdf('/home/grobleke/OTB.h5', mode='r', key='games')
games = games[['GameID', 'Outcome']]

In [4]:
def transform_col(i):
    return chr(ord('a') + i)

def transform_row(i):
    return i+1

def add_suffixes(df, i, label):
    ret = df[df.Ply == i].drop('Ply', axis=1)
    cols = ['MovedPiece', 'SourceCol', 'SourceRow', 'DestCol', 'DestRow']
    for col in cols:
        new_name = (col + '_' + label)
        ret.rename(columns={col: new_name}, inplace = True)
        if col in ['SourceCol', 'DestCol']:
            ret[new_name] = ret[new_name].map(transform_col)
        elif col in ['SourceRow', 'DestRow']:
            ret[new_name] = ret[new_name].map(transform_row)
    return ret

In [5]:
white_moves_1 = add_suffixes(moves, 1, 'w1')
white_moves_2 = add_suffixes(moves, 3, 'w2')
white_moves_3 = add_suffixes(moves, 5, 'w3')
white_moves_4 = add_suffixes(moves, 7, 'w4')
white_moves_5 = add_suffixes(moves, 9, 'w5')

black_moves_1 = add_suffixes(moves, 2, 'b1')
black_moves_2 = add_suffixes(moves, 4, 'b2')
black_moves_3 = add_suffixes(moves, 6, 'b3')
black_moves_4 = add_suffixes(moves, 8, 'b4')
black_moves_5 = add_suffixes(moves, 10, 'b5')

In [6]:
common_attrs = ['GameID']
full_opening_book_1 = white_moves_1
full_opening_book_2 = pd.merge(full_opening_book_1, black_moves_1, on=common_attrs)
full_opening_book_3 = pd.merge(full_opening_book_2, white_moves_2, on=common_attrs)
full_opening_book_4 = pd.merge(full_opening_book_3, black_moves_2, on=common_attrs)
full_opening_book_5 = pd.merge(full_opening_book_4, white_moves_3, on=common_attrs)
full_opening_book_6 = pd.merge(full_opening_book_5, black_moves_3, on=common_attrs)
full_opening_book_7 = pd.merge(full_opening_book_6, white_moves_4, on=common_attrs)
full_opening_book_8 = pd.merge(full_opening_book_7, black_moves_4, on=common_attrs)
full_opening_book_9 = pd.merge(full_opening_book_8, white_moves_5, on=common_attrs)
full_opening_book_10 = pd.merge(full_opening_book_9, black_moves_5, on=common_attrs)

In [64]:
num_games = games.GameID.nunique()
freq_threshold = 0.01
num_threshold = num_games * freq_threshold

In [8]:
def get_move_str(row, n):
    ret = ""
    for i in range(1, n+1):
        if i % 2 == 0:
            suffix = 'b' + str(int(i/2))
        else:
            suffix = 'w' + str(int((i+1)/2))
        piece = row['MovedPiece_' + suffix]
        source_col = row['SourceCol_' + suffix]
        source_row = row['SourceRow_' + suffix]
        dest_col = row['DestCol_' + suffix]
        dest_row = row['DestRow_' + suffix]

        ret = ret + "%s%s%s -> %s%s, " % (piece, source_col, source_row, dest_col, dest_row)
    return ret[:-2].replace("\n", '')

In [44]:
def get_bias(game_ids):
    total = game_ids.size
    relevant_games = games[games.GameID.isin(game_ids)]
    white_wins = relevant_games[relevant_games.Outcome == 'w'].shape[0]
    black_wins = relevant_games[relevant_games.Outcome == 'b'].shape[0]
    return (white_wins - black_wins)/total

In [81]:
group_cols_1 = full_opening_book_1.columns.drop('GameID').values
group_1 = full_opening_book_1.groupby(list(group_cols_1))
group_1 = group_1.filter(lambda x: x.GameID.size > num_threshold).groupby(list(group_cols_1))
win_bias_1 = group_1.apply(lambda x: get_bias(x.GameID)).reset_index(name='bias')
openings_1 = group_1.size().reset_index(name='count')
openings_1['bias'] = win_bias_1.bias
openings_1 = openings_1.sort_values('count', ascending=False)
openings_1['str'] = openings_1.apply(lambda x: get_move_str(x, 1), axis=1)

In [77]:
group_cols_2 = full_opening_book_2.columns.drop('GameID').values
group_2 = full_opening_book_2.groupby(list(group_cols_2))
group_2 = group_2.filter(lambda x: x.GameID.size > num_threshold).groupby(list(group_cols_2))
win_bias_2 = group_2.apply(lambda x: get_bias(x.GameID)).reset_index(name='bias')
openings_2 = group_2.size().reset_index(name='count')
openings_2['bias'] = win_bias_2.bias
openings_2 = openings_2.sort_values('count', ascending=False)
openings_2['str'] = openings_2.apply(lambda x: get_move_str(x, 2), axis=1)

In [79]:
group_cols_3 = full_opening_book_3.columns.drop('GameID').values
group_3 = full_opening_book_3.groupby(list(group_cols_3))
group_3 = group_3.filter(lambda x: x.GameID.size > num_threshold).groupby(list(group_cols_3))
win_bias_3 = group_3.apply(lambda x: get_bias(x.GameID)).reset_index(name='bias')
openings_3 = group_3.size().reset_index(name='count')
openings_3['bias'] = win_bias_3.bias
openings_3 = openings_3.sort_values('count', ascending=False)
openings_3['str'] = openings_3.apply(lambda x: get_move_str(x, 3), axis=1)

In [80]:
group_cols_4 = full_opening_book_4.columns.drop('GameID').values
group_4 = full_opening_book_4.groupby(list(group_cols_4))
group_4 = group_4.filter(lambda x: x.GameID.size > num_threshold).groupby(list(group_cols_4))
win_bias_4 = group_4.apply(lambda x: get_bias(x.GameID)).reset_index(name='bias')
openings_4 = group_4.size().reset_index(name='count')
openings_4['bias'] = win_bias_4.bias
openings_4 = openings_4.sort_values('count', ascending=False)
openings_4['str'] = openings_4.apply(lambda x: get_move_str(x, 4), axis=1)

In [91]:
group_cols_5 = full_opening_book_5.columns.drop('GameID').values
group_5 = full_opening_book_5.groupby(list(group_cols_5))
group_5 = group_5.filter(lambda x: x.GameID.size > num_threshold).groupby(list(group_cols_5))
win_bias_5 = group_5.apply(lambda x: get_bias(x.GameID)).reset_index(name='bias')
openings_5 = group_5.size().reset_index(name='count')
openings_5['bias'] = win_bias_5.bias
openings_5 = openings_5.sort_values('count', ascending=False)
openings_5['str'] = openings_5.apply(lambda x: get_move_str(x, 5), axis=1)

In [92]:
group_cols_6 = full_opening_book_6.columns.drop('GameID').values
group_6 = full_opening_book_6.groupby(list(group_cols_6))
group_6 = group_6.filter(lambda x: x.GameID.size > num_threshold).groupby(list(group_cols_6))
win_bias_6 = group_6.apply(lambda x: get_bias(x.GameID)).reset_index(name='bias')
openings_6 = group_6.size().reset_index(name='count')
openings_6['bias'] = win_bias_6.bias
openings_6 = openings_6.sort_values('count', ascending=False)
openings_6['str'] = openings_6.apply(lambda x: get_move_str(x, 6), axis=1)

In [99]:
group_cols_7 = full_opening_book_7.columns.drop('GameID').values
group_7 = full_opening_book_7.groupby(list(group_cols_7))
group_7 = group_7.filter(lambda x: x.GameID.size > num_threshold).groupby(list(group_cols_7))
win_bias_7 = group_7.apply(lambda x: get_bias(x.GameID)).reset_index(name='bias')
openings_7 = group_7.size().reset_index(name='count')
openings_7['bias'] = win_bias_7.bias
openings_7 = openings_7.sort_values('count', ascending=False)
openings_7['str'] = openings_7.apply(lambda x: get_move_str(x, 7), axis=1)

In [98]:
group_cols_8 = full_opening_book_8.columns.drop('GameID').values
group_8 = full_opening_book_8.groupby(list(group_cols_8))
group_8 = group_8.filter(lambda x: x.GameID.size > num_threshold).groupby(list(group_cols_8))
win_bias_8 = group_8.apply(lambda x: get_bias(x.GameID)).reset_index(name='bias')
openings_8 = group_8.size().reset_index(name='count')
openings_8['bias'] = win_bias_8.bias
openings_8 = openings_8.sort_values('count', ascending=False)
openings_8['str'] = openings_8.apply(lambda x: get_move_str(x, 8), axis=1)

In [102]:
group_cols_9 = full_opening_book_9.columns.drop('GameID').values
group_9 = full_opening_book_9.groupby(list(group_cols_9))
group_9 = group_9.filter(lambda x: x.GameID.size > num_threshold).groupby(list(group_cols_9))
win_bias_9 = group_9.apply(lambda x: get_bias(x.GameID)).reset_index(name='bias')
openings_9 = group_9.size().reset_index(name='count')
openings_9['bias'] = win_bias_9.bias
openings_9 = openings_9.sort_values('count', ascending=False)
openings_9['str'] = openings_9.apply(lambda x: get_move_str(x, 9), axis=1)

In [103]:
group_cols_10 = full_opening_book_10.columns.drop('GameID').values
group_10 = full_opening_book_10.groupby(list(group_cols_10))
group_10 = group_10.filter(lambda x: x.GameID.size > num_threshold).groupby(list(group_cols_10))
win_bias_10 = group_10.apply(lambda x: get_bias(x.GameID)).reset_index(name='bias')
openings_10 = group_10.size().reset_index(name='count')
openings_10['bias'] = win_bias_10.bias
openings_10 = openings_10.sort_values('count', ascending=False)
openings_10['str'] = openings_10.apply(lambda x: get_move_str(x, 10), axis=1)

In [104]:
opening_freq = openings_1[['count', 'str', 'bias']]
for df in [openings_2, openings_3, openings_4, openings_5,
           openings_6, openings_7, openings_8, openings_9, openings_10]:
    opening_freq = opening_freq.append(df[['count', 'str', 'bias']])

In [105]:
opening_freq.to_csv('/home/grobleke/opening_freq.csv')

In [88]:
(games[games.Outcome == 'w'].shape[0] - games[games.Outcome == 'b'].shape[0])/num_games

0.08331727311837517

In [90]:
games.Outcome.value_counts()

w    2458650
b    1933817
s    1894877
Name: Outcome, dtype: int64