#Python Chess Engine Train

**libraries**

---

In [17]:
import glob
import os
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.utils import shuffle

**data**

---

import the chess games dataset from github

In [18]:
!git clone https://github.com/iAmEthanMai/chess-games-dataset.git

fatal: destination path 'chess-games-dataset' already exists and is not an empty directory.


In [19]:


path_fischer = './chess-games-dataset/Data/CSV_FISCHER'
path_morphy = './chess-games-dataset/Data/CSV_MORPHY'
path_capablanca = './chess-games-dataset/Data/CSV_CAPABLANCA'

files_fischer = glob.glob(path_fischer + "/*.csv")
#files_morphy = glob.glob(path_morphy + "/*.csv")
#files_capablanca = glob.glob(path_capablanca + "/*.csv")


li = []

for filename in files_fischer:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

train = pd.concat(li, axis=0, ignore_index=True)

In [20]:
train = shuffle(train)

In [21]:
train.shape

(830376, 193)

In [22]:
train.head()

Unnamed: 0,a1,b1,c1,d1,e1,f1,g1,h1,a2,b2,...,to_h7,to_a8,to_b8,to_c8,to_d8,to_e8,to_f8,to_g8,to_h8,good_move
736412,,,K,R,,,R,,P,P,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,False
223012,R,N,B,Q,K,B,N,R,P,P,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
78395,R,,,Q,,R,K,,P,P,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
277830,R,,,,,R,K,,,P,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
671736,,,,,,Q,B,,,K,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False


**features**

---

In [23]:
features = list(train.iloc[:, 0:192].columns)

In [24]:
X = train[features]
y = train['good_move']

In [25]:
categorical_columns = list(X.iloc[:, 0:63].columns)
numerical_columns = list(X.iloc[:, 64:192].columns)
feature_columns = []

for feature_name in categorical_columns:
  vocabulary = X[feature_name].unique()
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))


for feature_name in numerical_columns:
  feature_columns.append(tf.feature_column.numeric_column(feature_name,dtype = tf.float32))

**input function**

---

In [26]:
def make_input_fn(data_df, label_df, num_epochs = 10, shuffle = True, batch_size = 32):
  def input_function():
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
    if shuffle:
      ds = ds.shuffle(1000)
    ds = ds.batch(batch_size).repeat(num_epochs)
    return ds
  return input_function

**split data into batches**

---

In [27]:
def split_into_batches(df, batch_size=100000):
  nb_rows = len(df.index)
  intervals = []

  for i in range(0, nb_rows + 1, batch_size):
    intervals.append(i)

  if(intervals[-1] != nb_rows):
    intervals.append(nb_rows)

  batches_X = []
  batches_y = []

  for i in range(0, len(intervals) - 1):
    batches_X.append(train.iloc[intervals[i]:intervals[i + 1], :][features])
    batches_y.append(train.iloc[intervals[i]:intervals[i + 1], :]['good_move'])

  return batches_X, batches_y

In [28]:
batches_X, batches_y = split_into_batches(train)

**model**

---

In [34]:
linear_est = tf.estimator.LinearClassifier(feature_columns = feature_columns, model_dir='./estimator/')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './estimator/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


**train model**

---

In [None]:
input_functions = []
for df_X, df_y in zip(batches_X, batches_y):
  input_functions.append(make_input_fn(df_X, df_y))

In [None]:
len(input_functions)

9

In [33]:
# train the model on all the input functions
i = 1
j = 1

for input_function in input_functions:
  j += 1

print(j)

for input_function in input_functions:
  print('<======================================== NEW BATCH ========================================>')
  print('Batch: ' + str(i))
  i = i + 1
  linear_est.train(input_function)


# save the model
serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
  tf.feature_column.make_parse_example_spec(feature_columns))

estimator_base_path = './estimator/'
estimator_path = linear_est.export_saved_model(estimator_base_path, serving_input_fn)

10
Batch: 1


TypeError: Expected binary or unicode string, got nan