In [1]:
import sqlite3
import pandas as pd
import matplotlib as rc
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np


In [2]:
#------------------------------------------------------------------------------
# accept a dataframe, remove outliers, return cleaned data in a new dataframe
# see http://www.itl.nist.gov/div898/handbook/prc/section1/prc16.htm
#------------------------------------------------------------------------------
def remove_outlier(df_in, col_name):
    q1 = df_in[col_name].quantile(0.25)
    q3 = df_in[col_name].quantile(0.75)
    iqr = q3-q1 #Interquartile range
    fence_low  = q1-1.5*iqr
    fence_high = q3+1.5*iqr
    df_out = df_in.loc[(df_in[col_name] > fence_low) & (df_in[col_name] < fence_high)]
    return df_out

# set style for seaborn
sns.set_style("whitegrid")
sns.set_context("paper")
rc.use("pgf")
rc.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})
#plt.rc('text', usetex=True)
#plt.rc('font', family='serif')


In [3]:
# connect to database
connection = sqlite3.connect("../output/db/repeated_bulk_analysis_2019-08-28.db")


In [4]:
# load games and moves into dataframes 
df_score = pd.read_sql("SELECT MAX(score_b0010) - MIN(score_b0010) as '10ms', MAX(score_b0020) - MIN(score_b0020) as '20ms', MAX(score_b0050) - MIN(score_b0050) as '50ms', MAX(score_b0100) - MIN(score_b0100) as '100ms', MAX(score_b0200) - MIN(score_b0200) as '200ms', MAX(score_b0500) - MIN(score_b0500) as '500ms', MAX(score_b1000) - MIN(score_b1000) as '1000ms', MAX(score_b2000) - MIN(score_b2000) as '2000ms', MAX(score_b10) - MIN(score_b10) as '15 d', MAX(score_b20) - MIN(score_b20) as '20 d' FROM score JOIN move ON score.move_id = move.id GROUP BY move.game_id, move.ply_number", connection)
df_score.describe()

Unnamed: 0,10ms,20ms,50ms,100ms,200ms,500ms,1000ms,2000ms,15 d,20 d
count,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0
mean,34.003215,32.588424,31.572347,32.37299,30.906752,27.14791,25.099678,23.546624,28.919614,24.601286
std,23.366574,20.509739,22.46561,23.516688,19.588533,19.435186,17.010614,17.201564,19.152107,16.00308
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,19.0,20.0,19.0,19.0,19.5,16.0,14.0,14.0,18.0,15.0
50%,30.0,30.0,30.0,29.0,29.0,24.0,23.0,22.0,26.0,23.0
75%,45.5,42.0,41.0,42.0,41.0,34.0,33.0,30.0,38.0,33.0
max,203.0,127.0,193.0,198.0,143.0,163.0,111.0,140.0,143.0,83.0


In [5]:
plt.figure(figsize=(7, 3))
boxplot_score = sns.boxplot(x="variable", y="value", data=pd.melt(df_score))
boxplot_score.set_xlabel("milliseconds and depth")
boxplot_score.set_ylabel("centipawn")
boxplot_score.set_title("Comparison between the different input parameters for the score evaluation.")
#sns.set_xlabel(["10ms", "20ms", "50ms", "100ms", "200ms", "500ms", "1000ms", "2000ms", "5000ms", "15 depth", "20 depth"])

Text(0.5, 1.0, 'Comparison between the different input parameters for the score evaluation.')

In [6]:
fig = boxplot_score.get_figure()
fig.tight_layout()
fig.savefig("boxplot_score.pgf")
fig.savefig("boxplot_score.pdf")


In [7]:
df_score.describe()


Unnamed: 0,10ms,20ms,50ms,100ms,200ms,500ms,1000ms,2000ms,15 d,20 d
count,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0,311.0
mean,34.003215,32.588424,31.572347,32.37299,30.906752,27.14791,25.099678,23.546624,28.919614,24.601286
std,23.366574,20.509739,22.46561,23.516688,19.588533,19.435186,17.010614,17.201564,19.152107,16.00308
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,19.0,20.0,19.0,19.0,19.5,16.0,14.0,14.0,18.0,15.0
50%,30.0,30.0,30.0,29.0,29.0,24.0,23.0,22.0,26.0,23.0
75%,45.5,42.0,41.0,42.0,41.0,34.0,33.0,30.0,38.0,33.0
max,203.0,127.0,193.0,198.0,143.0,163.0,111.0,140.0,143.0,83.0


In [8]:
df_timing_score = pd.read_sql("SELECT score_b0010 as '10ms', score_b0020 as '20ms', score_b0050 as '50ms', score_b0100 as '100ms', score_b0200 as '200ms', score_b0500 as '500ms', score_b1000 as '1000ms', score_b2000 as '2000ms', score_b10 as '15 d', score_b20 as '20 d' FROM timing_score", connection)


In [11]:
plt.figure(figsize=(3, 3))
boxplot_timing = sns.boxplot(x="variable", y="value", data=pd.melt(df_timing_score[['15 d', '20 d']]))
boxplot_timing.set_xlabel("depth")
boxplot_timing.set_ylabel("seconds")
boxplot_timing.set_title("Time comparison.")

Text(0.5, 1.0, 'Time comparison.')

In [12]:
fig = boxplot_timing.get_figure()
fig.tight_layout()
fig.savefig("boxplot_timing.pgf")
fig.savefig("boxplot_timing.pdf")