In [1]:
# Using Chess (King-Rook vs. King) Data Set  from:
# https://archive.ics.uci.edu/ml/datasets/Chess+%28King-Rook+vs.+King%29
# to create a model to classify: optimal depth-of-win for White in 0 to 16 moves

In [2]:
# Charging librarys initial librarys
import pandas as pd
import numpy as np
import os

In [3]:
# Importing dataset from UCI machine learning repository passing columns names and delimiter
xadrez = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/chess/king-rook-vs-king/krkopt.data',
                    delimiter=",",names=list(['White King file(column)','White King rank(row)',
                                              'White Rook file','White Rook rank','Black King file',
                                              'Black King rank','class']))
xadrez.head()

Unnamed: 0,White King file(column),White King rank(row),White Rook file,White Rook rank,Black King file,Black King rank,class
0,a,1,b,3,c,2,draw
1,a,1,c,1,c,2,draw
2,a,1,c,1,d,1,draw
3,a,1,c,1,d,2,draw
4,a,1,c,2,c,1,draw


In [4]:
# Converting our variables that are not numeric values into categoricals
xadrez["White King file(column)"] = xadrez['White King file(column)'].astype('category')
xadrez["White Rook file"] = xadrez['White Rook file'].astype('category')
xadrez["Black King file"] = xadrez['Black King file'].astype('category')
xadrez["class"] = xadrez['class'].astype('category')

xadrez.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28056 entries, 0 to 28055
Data columns (total 7 columns):
White King file(column)    28056 non-null category
White King rank(row)       28056 non-null int64
White Rook file            28056 non-null category
White Rook rank            28056 non-null int64
Black King file            28056 non-null category
Black King rank            28056 non-null int64
class                      28056 non-null category
dtypes: category(4), int64(3)
memory usage: 768.9 KB


In [5]:
# converting our categorical variables into new binary variables
df_wkf = pd.get_dummies(xadrez['White King file(column)'])
df_wrf = pd.get_dummies(xadrez['White Rook file'])
df_bkf = pd.get_dummies(xadrez['Black King file'])

# Concat new columns with binary values into xadrez dataset
xadrez = pd.concat([xadrez, df_wrf, df_bkf, df_wkf], axis=1)

xadrez.head()

Unnamed: 0,White King file(column),White King rank(row),White Rook file,White Rook rank,Black King file,Black King rank,class,a,b,c,...,c.1,d,e,f,g,h,a.1,b.1,c.2,d.1
0,a,1,b,3,c,2,draw,0,1,0,...,1,0,0,0,0,0,1,0,0,0
1,a,1,c,1,c,2,draw,0,0,1,...,1,0,0,0,0,0,1,0,0,0
2,a,1,c,1,d,1,draw,0,0,1,...,0,1,0,0,0,0,1,0,0,0
3,a,1,c,1,d,2,draw,0,0,1,...,0,1,0,0,0,0,1,0,0,0
4,a,1,c,2,c,1,draw,0,0,1,...,1,0,0,0,0,0,1,0,0,0


In [6]:
# Removing the old columns that are converted into binary
xadrez = xadrez.drop(['White King file(column)','White Rook file','Black King file'],axis=1)

xadrez.head()

Unnamed: 0,White King rank(row),White Rook rank,Black King rank,class,a,b,c,d,e,f,...,c.1,d.1,e.1,f.1,g,h,a.1,b.1,c.2,d.2
0,1,3,2,draw,0,1,0,0,0,0,...,1,0,0,0,0,0,1,0,0,0
1,1,1,2,draw,0,0,1,0,0,0,...,1,0,0,0,0,0,1,0,0,0
2,1,1,1,draw,0,0,1,0,0,0,...,0,1,0,0,0,0,1,0,0,0
3,1,1,2,draw,0,0,1,0,0,0,...,0,1,0,0,0,0,1,0,0,0
4,1,2,1,draw,0,0,1,0,0,0,...,1,0,0,0,0,0,1,0,0,0


In [7]:
# Visualysing catecories of our class
print(xadrez['class'].cat.categories)

Index(['draw', 'eight', 'eleven', 'fifteen', 'five', 'four', 'fourteen',
       'nine', 'one', 'seven', 'six', 'sixteen', 'ten', 'thirteen', 'three',
       'twelve', 'two', 'zero'],
      dtype='object')


In [8]:
# Converting each class into new individual binary classes
df_class = pd.get_dummies(xadrez['class'])

df_class.head()

Unnamed: 0,draw,eight,eleven,fifteen,five,four,fourteen,nine,one,seven,six,sixteen,ten,thirteen,three,twelve,two,zero
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [9]:
# -----------------------------------------------------------------------#
# ------------ Creating a ensemble model using DecisionTreeClassifier ---#
#------------- to train and predict each binary class -------------------#
# ------------ ----------------------------------------------------------#

In [10]:
# Creating two variables to calculate average accuracy
accuracy = 0
m=0

# Creating a loop to train and predict each class
for column in df_class:
    
    x = xadrez.drop('class',axis=1) #separating varibles into x
    y = df_class[column] #separating target(class) into y

    #importing train_test_split function to split dataset into train and test
    from sklearn.model_selection import train_test_split

    #spliting dataset
    np.random.seed(42)
    x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3) #70% to train and 30% to test

    #importing DecisionTreeClassifier function to train the model
    from sklearn.tree import DecisionTreeClassifier

    #creating the model and storage into tree varible
    tree = DecisionTreeClassifier()

    #training the model
    tree.fit(x_train,y_train)

    #applying our model in test dataset
    y_pred_test = tree.predict(x_test)
    
    #geting the last prediction
    df = pd.DataFrame(y_pred_test,columns =[column])
    df = df.tail(1)
    
    # If the class are the 'draw' then we create a DataFrame to storage
    # the next last prediction of another classes
    if column=='draw':
        pred = pd.DataFrame(0,columns=['test'],index=[df.index[0]])
    
    # Concat the last prediction with pred Dataframe
    pred = pd.concat([pred, df], axis=1)
    
    #--------------------------------------------------------------#
    #--------- Seeing results of each prediction separately -------#
    #--------------------------------------------------------------#

    # Importing 'classification_report' and 'confusion_matrix' functions
    from sklearn.metrics import classification_report, confusion_matrix

    print("confusion matrix of:",column)
    print(confusion_matrix(y_test,y_pred_test)) #printing confusion matrix
    print()
    print()
    print(classification_report(y_test,y_pred_test)) #printing classification report

    # Importing metrics to calculate precision/accuracy
    import sklearn.metrics as metrics

    #calculating and printing precision
    print("Accuracy:", round(metrics.precision_score(y_test,y_pred_test),3))
    print()
    print("------------------------------------------------") #given a space
    print()
    
    # Sum all accuracys of each prediction to calculate average accuracy
    accuracy = accuracy + metrics.precision_score(y_test,y_pred_test)
    m=m+1

# Calculating average accuracy
accuracy = accuracy/m

# Ploting results
print('Prediction of each class:')
print()
print(pred.head())

confusion matrix of: draw
[[7547   50]
 [  26  794]]


              precision    recall  f1-score   support

           0       1.00      0.99      0.99      7597
           1       0.94      0.97      0.95       820

   micro avg       0.99      0.99      0.99      8417
   macro avg       0.97      0.98      0.97      8417
weighted avg       0.99      0.99      0.99      8417

Accuracy: 0.941

------------------------------------------------

confusion matrix of: eight
[[7872  119]
 [  87  339]]


              precision    recall  f1-score   support

           0       0.99      0.99      0.99      7991
           1       0.74      0.80      0.77       426

   micro avg       0.98      0.98      0.98      8417
   macro avg       0.86      0.89      0.88      8417
weighted avg       0.98      0.98      0.98      8417

Accuracy: 0.74

------------------------------------------------

confusion matrix of: eleven
[[7334  199]
 [ 200  684]]


              precision    recall  f1-score  

In [15]:
# Printing average accuracy
print('Average accuracy:',round(accuracy*100,2),'%')

Average accuracy: 78.63 %


In [16]:
# Selecting dataset xadrez with the classes
full_data = xadrez.drop('class',axis=1)

# Selecting a new row of data to realize the prediction
data_to_prediction = full_data[5000:5001] #selecting the row 5000
data_to_prediction.head()

Unnamed: 0,White King rank(row),White Rook rank,Black King rank,a,b,c,d,e,f,g,...,c.1,d.1,e.1,f.1,g.1,h,a.1,b.1,c.2,d.2
5000,3,4,1,0,0,0,0,0,0,1,...,0,0,0,1,0,0,0,0,0,1


In [17]:
# Now we can execute almost the same lines of code in model production
accuracy = 0
m=0

for column in df_class:
    
    x = xadrez.drop('class',axis=1) #separing the varibles into x
    y = df_class[column] #separing the target(class) into y

    from sklearn.model_selection import train_test_split

    # Spliting dataset
    np.random.seed(42)
    x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3)

    # Importing DecisionTreeClassifier function to train the model
    from sklearn.tree import DecisionTreeClassifier

    # Creating the model and storage into tree varible
    tree = DecisionTreeClassifier()

    # Training the model
    tree.fit(x_train,y_train)
    
    # Recieve new data to predict
    x_test = data_to_prediction

    # Applying our model in test dataset
    y_pred_test = tree.predict(x_test)
    
    # Geting the last prediction
    df = pd.DataFrame(y_pred_test,columns =[column])
    df = df.tail(1)
    
    # If the class are the 'draw' then we create a DataFrame to storage
    # the next last prediction of another classes
    if column=='draw':
        pred = pd.DataFrame(0,columns=['test'],index=[df.index[0]])
    
    # Concat the last prediction with pred Dataframe
    pred = pd.concat([pred, df], axis=1)

# Ploting a DataFrame with each prediction
print('Prediction of each class:')
print()
print(pred.head())

Prediction of each class:

   test  draw  eight  eleven  fifteen  five  four  fourteen  nine  one  seven  \
0     0     0      0       0        0     0     0         0     0    0      1   

   six  sixteen  ten  thirteen  three  twelve  two  zero  
0    0        0    0         0      0       0    0     0  


In [21]:
# Ploting class that was predicted to the last row
for i in pred.columns:
    newdf = pred.loc[lambda pred: pred[i] == 1]
    if(not newdf.empty):
        print('The optimal depth-of-win moves for White is:')
        print()
        print(i)

The optimal depth-of-win moves for White is:

seven


In [22]:
# Verifying if the class predicted was right
xadrez[5000:5001]

Unnamed: 0,White King rank(row),White Rook rank,Black King rank,class,a,b,c,d,e,f,...,c.1,d.1,e.1,f.1,g,h,a.1,b.1,c.2,d.2
5000,3,4,1,seven,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
