In [41]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import numpy as np

#returns a pandas dataframe with the data from a specified csv file
def loadData(filename):
    data = pd.read_csv(filename)
    return data

#returns one of 13 chess opening subtypes given a specific opening
def openType(openStr):
    open = openStr[0]
    num = int(openStr[1:])
    if (openStr[0] == 'A'):
        if (num < 40):
            open += '00'
        elif (num < 45):
            open += '40'
        elif (num < 50):
            open += '45'
        elif (num < 80):
            open += '50'
        else:
            open += '80'
    if (openStr[0] == 'B'):
        if (num < 20):
            open += '00'
        else:
            open += '20'
    if (openStr[0] == 'C'):
        if (num < 20):
            open += '00'
        else:
            open += '20'
    if (openStr[0] == 'D'):
        if (num < 70):
            open += '00'
        else:
            open += '70'
    if (openStr[0] == 'E'):
        if (num < 60):
            open += '00'
        else:
            open += '60'
    return open

#returns a dataset and labelset from a pandas dataframe
#the returned dataset includes the first (openingPly) moves preformed
#the returned labelset is the opening subtype defined by the moves preformed in the dataset
def dataExtraction(baseData):
    data = []
    labels = []
    
    for i in range(len(baseData.index)):
        row = baseData.iloc[i, :]
        opening = []
        moveText = row[12].split()
        for j in range(row[15]):
            opening.append(moveText[j])
        data.append(opening)
        labels.append(openType(row[13]))
    
    return data, labels

#returns a numericized version of the input array along with the dictionary used for conversion
def numericize(textData):
    numericDic = {}
    conversion = []
    if type(textData[0]) != list:
        for label in textData:
            if label not in numericDic:
                numericDic[label] = len(numericDic)
            conversion.append(numericDic[label])
    else:
        for text in textData:
            moveList = []
            for move in text:
                if move not in numericDic:
                    numericDic[move] = len(numericDic)
                moveList.append(numericDic[move])
            conversion.append(moveList)
            
    return numericDic, conversion

print("check")

check


In [42]:
from keras.preprocessing import sequence
from sklearn.model_selection import train_test_split

data = loadData("games.csv")
maxMoves = max(data.iloc[:,15])
moves, labels = dataExtraction(data)
moveDic, numMoves = numericize(moves)
labelDic, numLabels = numericize(labels)
numLabels = np.array(numLabels)

numMoves = sequence.pad_sequences(numMoves, maxlen=maxMoves)

#builds subsets for testing and training data at an 80-20 split
X_train, X_test, y_train, y_test = train_test_split(numMoves, numLabels, test_size=0.2, stratify=numLabels)

print("check")

check


In [43]:
from keras import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout

embedding_size=32
model=Sequential()
model.add(Embedding(len(moveDic), embedding_size, input_length=maxMoves))
model.add(LSTM(100))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

batch_size = 64
num_epochs = 3
model.fit(X_train, y_train, batch_size=batch_size, epochs=num_epochs, validation_split=0.2)

scores = model.evaluate(X_test, y_test, verbose=0)
print('Test accuracy:', scores[1])

Epoch 1/3
Epoch 2/3
Epoch 3/3
Test accuracy: 0.3075772821903229


In [5]:
if "this" in "whatisthis":
    print("true")
print(openType("C14"))

true
C00


In [36]:
for i in range(len(numMoves)):
    print(moves[i])
    print(numMoves[i])

['d4', 'd5', 'c4', 'c6', 'cxd5']
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 2 3 4]
['d4', 'Nc6', 'e4', 'e5']
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 5 6 7]
['e4', 'e5', 'd3']
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 7 8]
['d4', 'd5', 'Nf3']
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 9]
['e4', 'e5', 'Nf3', 'd6', 'd4']
[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  6
  7  9 10  0]
['e4', 'c5', 'Nf3', 'Qa5']
[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  6 11  9 12]
['d4', 'd5', 'e4', 'dxe4', 'Nc3', 'Nf6', 'f3', 'exf3', 'Nxf3', 'Nc6']
[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  1  6 13 14 15
 16 17 18  5]
['e4', 'Nc6', 'd4', 'e5', 'd5']
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 5 0 7 1]
['e4', 'e5', 'Bc4', 'Nc6', 'Nf3', 'Nd4']
[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  6  7
 19  5  9 20]
['e4', 'd5', 'exd5', 'Qxd5']
[ 0  0  0  0  0  0  0  0  0  0  0