## Deep Learning Linear Regression Model

By Evan Richardson

This model is using simple dense layers to generate a predictive model of the outcome of the game.  Starting with the simplest will hopefully produce a better model than the linear regression model I trained last quarter.

In [0]:
import tensorflow
print(tensorflow.__version__)

import numpy as np
import pandas as pd
from google.colab import drive
from sklearn.model_selection import train_test_split
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import RMSprop

1.13.0-rc0


In [0]:
drive.mount('/content/gdrive')
df = pd.read_csv("/content/gdrive/My Drive/Colab Notebooks/chess/201801_games_dl.csv")


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
df.head()

Unnamed: 0,Result,Move,Turn,a1,b1,c1,d1,e1,f1,g1,...,g7,h7,a8,b8,c8,d8,e8,f8,g8,h8
0,1,0,1,2.0,3.0,4.0,5.0,6.0,4.0,3.0,...,7.0,7.0,8.0,9.0,10.0,11.0,12.0,10.0,9.0,8.0
1,1,1,2,2.0,3.0,4.0,5.0,6.0,4.0,3.0,...,7.0,7.0,8.0,9.0,10.0,11.0,12.0,10.0,9.0,8.0
2,1,2,1,2.0,3.0,4.0,5.0,6.0,4.0,3.0,...,7.0,7.0,8.0,9.0,10.0,11.0,12.0,10.0,9.0,8.0
3,1,3,2,2.0,3.0,4.0,5.0,6.0,4.0,3.0,...,7.0,7.0,8.0,9.0,10.0,11.0,12.0,10.0,9.0,8.0
4,1,4,1,2.0,3.0,4.0,5.0,6.0,4.0,3.0,...,7.0,7.0,8.0,9.0,10.0,11.0,12.0,10.0,9.0,8.0


In [0]:
df.shape

(504909, 67)

In [0]:
squares = list(df.columns[-64:])
print(squares)

pieces = list(map(lambda x: str(x), range(1, 13)))
print(pieces)

def mapColumns(col_name, columns):
  return list(map(lambda x: col_name + '_' + x, columns))

['a1', 'b1', 'c1', 'd1', 'e1', 'f1', 'g1', 'h1', 'a2', 'b2', 'c2', 'd2', 'e2', 'f2', 'g2', 'h2', 'a3', 'b3', 'c3', 'd3', 'e3', 'f3', 'g3', 'h3', 'a4', 'b4', 'c4', 'd4', 'e4', 'f4', 'g4', 'h4', 'a5', 'b5', 'c5', 'd5', 'e5', 'f5', 'g5', 'h5', 'a6', 'b6', 'c6', 'd6', 'e6', 'f6', 'g6', 'h6', 'a7', 'b7', 'c7', 'd7', 'e7', 'f7', 'g7', 'h7', 'a8', 'b8', 'c8', 'd8', 'e8', 'f8', 'g8', 'h8']
['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12']


In [0]:
dummy_columns = {}
for col_name in squares:
    dummies = pd.get_dummies(df[col_name], prefix=col_name)
    reindex_columns = mapColumns(col_name, pieces)
    dummies = dummies.reindex(columns=reindex_columns, fill_value=0)
    
    dummy_columns[col_name] = dummies.columns.values
    df = df.merge(dummies, left_index=True, right_index=True)
    df = df.drop(columns=col_name)

In [0]:
df_train, df_test = train_test_split(df)
print(df.shape, df_train.shape, df_test.shape)

(504909, 771) (378681, 771) (126228, 771)


In [0]:
y_train = df_train["Result"].values
y_test = df_test["Result"].values

In [0]:
def create_feature_matrices(df_train, df_test, selected_columns, dummy_columns):
    """
    Creates feature matrices for the training and testing sets from the given dataframes.
    The feature matrices are built from the columns given in selected columns.
    """
    
    # keep track of numerical features
    numerical_trial_columns = []
    
    # keep track of dummy features for categorical variables
    categorical_trial_columns = []
    
    # build feature lists
    for col_name in selected_columns:
        if col_name in dummy_columns:
            categorical_trial_columns.extend(dummy_columns[col_name])
        else:
            numerical_trial_columns.append(col_name)

    # transform numerical features
    if len(numerical_trial_columns) > 0:
        X_train_numerical = df_train[numerical_trial_columns].astype("float64").values
        X_test_numerical = df_test[numerical_trial_columns].astype("float64").values
    
        scaler = StandardScaler()
        X_train_numerical = scaler.fit_transform(X_train_numerical)
        X_test_numerical = scaler.transform(X_test_numerical)
    
    # select categorical features
    if len(categorical_trial_columns) > 0:
        X_train_categorical = df_train[categorical_trial_columns].astype("float64").values
        X_test_categorical = df_test[categorical_trial_columns].astype("float64").values
    
    # concatenate feature matrices
    if len(numerical_trial_columns) > 0 and len(categorical_trial_columns) > 0:
        X_train = np.hstack([X_train_numerical, X_train_categorical])
        X_test = np.hstack([X_test_numerical, X_test_categorical])
    elif len(numerical_trial_columns) > 0:
        X_train = X_train_numerical
        X_test = X_test_numerical
    else:
        X_train = X_train_categorical
        X_test = X_test_categorical
        
    return X_train, X_test


In [0]:
x_train, x_test = create_feature_matrices(df_train, df_test, squares, dummy_columns)

print(x_train.shape, x_test.shape)

(378681, 768) (126228, 768)


In [0]:
batch_size = 256
epochs = 10

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(768,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1))

model.summary()

model.compile(loss='mean_squared_error',
              optimizer='adam')
              # metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score)
# print('Test accuracy:', score[1])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               393728    
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 513       
Total params: 656,897
Trainable params: 656,897
Non-trainable params: 0
_______________

In [0]:
model.save('/content/gdrive/My Drive/Colab Notebooks/chess/20190209_linear.h5')

Unfortunately this model didn't produce a very valuable model, and I think the next step will involve using convolutional layers. ![Example 1](Example4.png)