# Naive Bayes Classifier for Chess Openings

## Model Code:

In [2]:
import pandas as pd
import numpy as np

from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import time

opening_cats = {'A00-A39': 0, 'A40-A44': 1, 'A45-A49': 2, 'A50-A79': 3, 'A80-A99': 4, 
    'B00-B19': 5, 'B20-B99': 6, 'C00-C19': 7, 'C20-C99': 8, 'D00-D69': 9, 'D70-D99': 10, 'E00-E59': 11, 'E60-E99': 12}

def LoadData(num_to_exclude, truncate_ply, move_tokenizer):
    to_exclude = [i for i in range(1, num_to_exclude)]# specifies the amount of data to load in (leave list empty to load all data)
    games = pd.read_csv('games.csv', header=0, encoding='latin-1', skiprows=to_exclude)
    labels = []
    for index, row in games.iterrows():
        opening_num = int(row['opening_eco'][1:])
        if (row['opening_eco'][0] == 'A'):
            if(opening_num <= 39):
                labels.append(0)
            elif(opening_num <= 44):
                labels.append(1)
            elif(opening_num <= 49):
                labels.append(2)
            elif(opening_num <= 79):
                labels.append(3)
            else:
                labels.append(4)
        elif (row['opening_eco'][0] == 'B'):
            if(opening_num <= 19):
                labels.append(5)
            else:
                labels.append(6)
        elif (row['opening_eco'][0] == 'C'):
            if(opening_num <= 19):
                labels.append(7)
            else:
                labels.append(8)
        elif (row['opening_eco'][0] == 'D'):
            if(opening_num <= 69):
                labels.append(9)
            else:
                labels.append(10)
        else:
            if(opening_num <= 59):
                labels.append(11)
            else:
                labels.append(12)
                
    games = pd.concat([games, pd.DataFrame({'label': labels})], axis=1)
    headers = list(games.columns.values)

    X_train, X_test, y_train, y_test = train_test_split(games.to_numpy(), labels, test_size=0.2)
    X_train = pd.DataFrame(data=X_train, columns=headers)
    X_test = pd.DataFrame(data=X_test, columns=headers)

    # dictionary for how to tokenize moves into a list
    # by ply: split by  each move of white or black
    # by turn: split by each turn i.e. one white move and one black move
    # by turn with number: split by turn and add the number of the turn to the beginning of the string (psuedo-dependency)
    move_tokenizer_options = {'by ply': 0, 'by turn': 1, 'by turn with number': 2}

    games, mcw = processGames(X_train, truncate_ply[0], move_tokenizer_options[move_tokenizer], opening_cats)
    test, mcw_test = processGames(X_test, truncate_ply[1], move_tokenizer_options[move_tokenizer], opening_cats)
    return games, test, mcw

def processGames(games, truncate_ply, move_tokenizer, opening_cats):
    data = [games['moves'], games['opening_eco']]
    ply = games['opening_ply']
    labels = games['label']
    headers = ['moves', 'opening']
    data = pd.concat(data, axis=1, keys=headers)
    maxPly = 14

    for index, row in data.iterrows():
        row['opening'] = labels[index]
        ply[index] = ply[index]+1 if (ply[index] % 2 != 0) else ply[index]
        moveCount = 0
        moves = word_tokenize(row['moves'])

        if (move_tokenizer == 0):
            if(truncate_ply):
                row['moves'] = moves[0:ply[index]+1]
            else:
                row['moves'] = moves[0:maxPly+1]
        else:
            formattedMoves = []
            for move in moves:
                if(truncate_ply): 
                    if (moveCount >= (ply[index])):
                        break
                elif (moveCount >= maxPly):
                    break

                if(move_tokenizer == 1):
                    if(moveCount%2==0):
                        formattedMoves.append(str(move))
                    else:
                        formattedMoves[int(moveCount/2)] += ' ' + str(move)
                if(move_tokenizer == 2):
                    if(moveCount%2==0):
                        formattedMoves.append(str(int(moveCount/2)+1) + '.' + str(move))
                    else:
                        formattedMoves[int(moveCount/2)] += ' ' + str(move)	

                moveCount += 1
            row['moves'] = formattedMoves

    mcw = []
    for key in opening_cats:
        rows = data.loc[data['opening'] == opening_cats[key]]
        moves = []
        for index, row in rows.iterrows():
            moves += row['moves']
        mcw.append(MostCommonWords(moves))
    return data, mcw


# This function calculates the requency of words using NLTK
# Input: data in string format
# Output: data_dist is a data dictionary like NLTK object
def MostCommonWords(data):
    data_dist = FreqDist(data)
    return data_dist

def Prob_Word_GivenY(word, train_data, numWords, alpha, y):
    sum = 0
    count_y = 0
    for i, row in train_data.iterrows():
        if(row['feature_list'].get(word)):
            if(row['opening']==y and row['feature_list'].get(word)>0):
                sum += 1
                count_y += 1
    return (sum + alpha) / (count_y + numWords*alpha)

def Classify2(moves, p_category, train_splits, numWords, alpha, categories):
    p_cat_given_moves = [x for x in p_category]

    for move in moves:
        for key, value in categories.items():
            p_cat_given_moves[value] *= Prob_Word_GivenY(move, train_splits[value], numWords, alpha, value)
    return p_cat_given_moves.index(max(p_cat_given_moves))

def Training2(train_data, train_wc, categories, test_data):
    training_start_time = time.time()
    
    dictionary = set()
    for frqdist in train_wc:
        dictionary = dictionary.union(set(frqdist.keys()))
        
    alpha = 1
    num_words = [len(frqdist) for frqdist in train_wc]

    train_data['feature_list'] = ""
    for i, row in train_data.iterrows():
        word_map = {}
        for word in dictionary:
            word_map[word] = row['moves'].count(word)
        row['feature_list'] = word_map

    train_splits = []
    m = []
    for key in categories:
        rows = train_data.loc[train_data['opening'] == categories[key]]
        m.append(len(rows))
        train_splits.append(rows)
    p = [(m_cat + 1) / (sum(m) + len(categories)*alpha) for m_cat in m]
    
    print('Training Time (seconds): ', (time.time() - training_start_time))
    
    test_start_time = time.time()
    correct = 0
    shape = np.zeros(shape=(len(categories), len(categories)))
    conf_matrix = pd.DataFrame(shape)
    for i, row in test_data.iterrows():
        prediction = Classify2(row['moves'], p, train_splits, sum(num_words), alpha, categories)
        conf_matrix.iat[prediction, row['opening']] += 1
        correct += 1 if prediction == row['opening'] else 0
        # print(prediction, row['opening'])
    print('Testing Time (seconds): ', (time.time() - test_start_time))
    print('ACCURACY: ', correct/len(test_data))
    print(conf_matrix)

## Tests:

### 500 Games, training not truncated by opening_ply, separated by ply

In [15]:
games, test, mcw = LoadData(19500, [False, False], 'by ply')
Training2(games, mcw, opening_cats, test)

Training Time (seconds):  1.6902530193328857
Testing Time (seconds):  53.03750777244568
ACCURACY:  0.36607142857142855
      0    1    2    3    4     5     6     7     8    9   10   11   12
0   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0  0.0
1   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0  0.0
2   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0  0.0
3   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0  0.0
4   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0  0.0
5   0.0  1.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0  0.0
6   2.0  0.0  0.0  2.0  2.0   0.0   5.0   1.0   0.0  0.0  0.0  1.0  0.0
7   0.0  0.0  0.0  0.0  0.0   0.0   1.0   0.0   0.0  0.0  0.0  0.0  0.0
8   5.0  2.0  0.0  0.0  1.0  13.0  14.0  10.0  28.0  3.0  0.0  0.0  2.0
9   2.0  4.0  1.0  0.0  0.0   0.0   0.0   0.0   0.0  8.0  0.0  4.0  0.0
10  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0  0.0
11  0.0  0.0  0.0

### 500 Games, training not truncated by opening_ply, separated by turn

In [16]:
games, test, mcw = LoadData(19500, [False, False], 'by turn')
Training2(games, mcw, opening_cats, test)

Training Time (seconds):  5.411482095718384
Testing Time (seconds):  23.001007080078125
ACCURACY:  0.8392857142857143
      0    1    2    3    4    5     6     7     8     9   10   11   12
0   2.0  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
1   0.0  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
2   0.0  0.0  1.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
3   0.0  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
4   0.0  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
5   1.0  0.0  2.0  0.0  0.0  9.0   0.0   0.0   0.0   0.0  0.0  0.0  1.0
6   1.0  0.0  0.0  0.0  0.0  0.0  19.0   0.0   0.0   0.0  0.0  0.0  0.0
7   0.0  0.0  0.0  0.0  0.0  0.0   0.0  12.0   0.0   0.0  0.0  0.0  0.0
8   0.0  2.0  0.0  0.0  0.0  2.0   0.0   0.0  30.0   0.0  0.0  0.0  0.0
9   3.0  4.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0  16.0  1.0  0.0  0.0
10  0.0  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
11  0.0  0.0  1.0 

### 500 Games, training not truncated by opening_ply, separated by turn with number

In [17]:
games, test, mcw = LoadData(19500, [False, False], 'by turn with number')
Training2(games, mcw, opening_cats, test)

Training Time (seconds):  8.081760883331299
Testing Time (seconds):  22.535916805267334
ACCURACY:  0.8571428571428571
      0    1    2    3    4     5     6     7     8    9   10   11   12
0   3.0  0.0  0.0  0.0  0.0   1.0   0.0   0.0   0.0  1.0  0.0  0.0  0.0
1   0.0  1.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0  0.0
2   0.0  0.0  4.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0  0.0
3   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0  0.0
4   0.0  0.0  0.0  0.0  2.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0  0.0
5   0.0  0.0  0.0  0.0  0.0  12.0   0.0   0.0   0.0  0.0  0.0  0.0  0.0
6   0.0  2.0  0.0  0.0  0.0   0.0  19.0   0.0   0.0  1.0  0.0  0.0  0.0
7   1.0  2.0  0.0  0.0  0.0   0.0   0.0  10.0   0.0  0.0  0.0  0.0  0.0
8   1.0  1.0  0.0  0.0  0.0   0.0   0.0   0.0  29.0  0.0  0.0  0.0  1.0
9   0.0  3.0  1.0  0.0  0.0   0.0   0.0   0.0   0.0  7.0  0.0  0.0  0.0
10  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0  0.0
11  0.0  0.0  0.0 

### 1000 Games, training truncated by opening_ply, separated by ply

In [18]:
games, test, mcw = LoadData(19000, [True, False], 'by ply')
Training2(games, mcw, opening_cats, test)

Training Time (seconds):  1.5964381694793701
Testing Time (seconds):  176.69145584106445
ACCURACY:  0.47641509433962265
      0    1    2    3    4     5     6     7     8     9   10   11   12
0   3.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
1   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
2   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
3   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
4   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
5   0.0  0.0  0.0  0.0  0.0   1.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
6   0.0  1.0  1.0  0.0  0.0   1.0   8.0   4.0   0.0   0.0  0.0  1.0  0.0
7   0.0  0.0  0.0  0.0  0.0   0.0   0.0   2.0   0.0   0.0  0.0  0.0  0.0
8   8.0  0.0  4.0  0.0  2.0  25.0  18.0  11.0  61.0   3.0  0.0  1.0  3.0
9   5.0  8.0  2.0  1.0  1.0   1.0   0.0   0.0   0.0  26.0  1.0  9.0  0.0
10  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
11  

### 1000 Games, training truncated by opening_ply, separated by turn

In [19]:
games, test, mcw = LoadData(19000, [True, False], 'by turn')
Training2(games, mcw, opening_cats, test)

Training Time (seconds):  3.7460148334503174
Testing Time (seconds):  79.0015549659729
ACCURACY:  0.7075471698113207
       0    1    2    3    4     5     6     7     8     9   10   11   12
0    1.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
1    0.0  1.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
2    0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
3    0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
4    0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
5    4.0  2.0  2.0  1.0  0.0  13.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
6    1.0  0.0  0.0  0.0  0.0   1.0  29.0   0.0   0.0   0.0  0.0  0.0  0.0
7    0.0  0.0  0.0  0.0  0.0   0.0   0.0  13.0   0.0   1.0  0.0  0.0  0.0
8   10.0  1.0  1.0  0.0  1.0   6.0   0.0   0.0  60.0   0.0  0.0  0.0  0.0
9   10.0  9.0  3.0  0.0  2.0   1.0   0.0   1.0   0.0  23.0  0.0  2.0  0.0
10   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0 

### 1000 Games, training truncated by opening_ply, separated by turn with number

In [20]:
games, test, mcw = LoadData(19000, [True, False], 'by turn with number')
Training2(games, mcw, opening_cats, test)

Training Time (seconds):  5.49053692817688
Testing Time (seconds):  81.03646898269653
ACCURACY:  0.8301886792452831
      0    1    2    3    4     5     6     7     8     9   10   11   12
0   7.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   1.0  0.0  0.0  0.0
1   0.0  6.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
2   0.0  0.0  1.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
3   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
4   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
5   0.0  0.0  0.0  0.0  0.0  20.0   0.0   0.0   1.0   0.0  0.0  0.0  0.0
6   1.0  1.0  0.0  0.0  0.0   0.0  23.0   0.0   0.0   0.0  0.0  0.0  0.0
7   1.0  0.0  0.0  0.0  0.0   0.0   0.0  14.0   0.0   2.0  0.0  0.0  0.0
8   4.0  2.0  0.0  0.0  1.0   3.0   0.0   0.0  67.0   0.0  0.0  1.0  0.0
9   2.0  2.0  9.0  0.0  2.0   0.0   0.0   0.0   0.0  31.0  0.0  2.0  1.0
10  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
11  0.0 

### 1000 Games, training not truncated by opening_ply, separated by turn

In [22]:
games, test, mcw = LoadData(19000, [False, False], 'by turn')
Training2(games, mcw, opening_cats, test)

Training Time (seconds):  15.372795820236206
Testing Time (seconds):  79.9490258693695
ACCURACY:  0.7405660377358491
      0    1    2    3    4     5     6     7     8     9   10   11   12
0   5.0  1.0  0.0  0.0  1.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  2.0
1   1.0  5.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
2   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
3   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
4   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
5   1.0  1.0  5.0  0.0  0.0  17.0   0.0   0.0   0.0   0.0  0.0  0.0  3.0
6   2.0  0.0  2.0  0.0  0.0   0.0  29.0   0.0   0.0   0.0  0.0  1.0  0.0
7   0.0  0.0  0.0  0.0  0.0   0.0   0.0  14.0   0.0   0.0  0.0  0.0  0.0
8   8.0  1.0  0.0  0.0  0.0   6.0   0.0   0.0  56.0   0.0  0.0  0.0  0.0
9   3.0  4.0  2.0  0.0  1.0   0.0   0.0   0.0   0.0  28.0  2.0  7.0  1.0
10  0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
11  0.0

### 1000 Games, training not truncated by opening_ply, separated by turn with number

In [23]:
games, test, mcw = LoadData(19000, [False, False], 'by turn with number')
Training2(games, mcw, opening_cats, test)

Training Time (seconds):  24.883290767669678
Testing Time (seconds):  79.53715395927429
ACCURACY:  0.8160377358490566
      0     1     2    3    4     5     6     7     8     9   10   11   12
0   3.0   1.0   0.0  0.0  0.0   0.0   0.0   0.0   0.0   1.0  0.0  0.0  1.0
1   0.0  10.0   0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
2   0.0   0.0   2.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
3   0.0   0.0   0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
4   0.0   0.0   0.0  0.0  1.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
5   1.0   0.0   0.0  0.0  0.0  21.0   0.0   0.0   0.0   0.0  0.0  1.0  0.0
6   0.0   0.0   0.0  0.0  0.0   0.0  19.0   0.0   0.0   0.0  0.0  0.0  0.0
7   0.0   0.0   0.0  0.0  0.0   0.0   0.0  13.0   0.0   0.0  0.0  0.0  0.0
8   8.0   0.0   0.0  0.0  2.0   3.0   0.0   0.0  67.0   0.0  0.0  0.0  0.0
9   5.0   1.0  10.0  0.0  1.0   0.0   0.0   0.0   0.0  30.0  1.0  1.0  0.0
10  0.0   0.0   0.0  0.0  0.0   0.0   0.0   0.0   0.0   0

### 2,000 Games, training truncated by opening_ply, separated by turn

In [3]:
games, test, mcw = LoadData(18000, [True, False], 'by turn')
Training2(games, mcw, opening_cats, test)

Training Time (seconds):  9.83779501914978
Testing Time (seconds):  303.6922070980072
ACCURACY:  0.720873786407767
       0    1    2    3    4     5     6     7     8     9   10   11   12
0   14.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
1    0.0  1.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
2    0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
3    0.0  0.0  0.0  1.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
4    0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0.0
5    4.0  2.0  0.0  0.0  0.0  27.0   0.0   3.0   0.0   0.0  0.0  0.0  0.0
6    4.0  3.0  1.0  0.0  1.0   1.0  58.0   0.0   0.0   0.0  0.0  0.0  0.0
7    0.0  1.0  0.0  0.0  0.0   0.0   0.0  29.0   0.0   0.0  0.0  0.0  0.0
8   23.0  7.0  2.0  1.0  4.0  13.0   3.0   2.0  97.0   1.0  0.0  0.0  2.0
9   10.0  6.0  8.0  0.0  4.0   2.0   0.0   0.0   0.0  56.0  2.0  5.0  0.0
10   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0   0.0   0.0  0.0  0.0  0

### 2,000 Games, training truncated by opening_ply, separated by turn with number

In [4]:
games, test, mcw = LoadData(18000, [True, False], 'by turn with number')
Training2(games, mcw, opening_cats, test)

Training Time (seconds):  15.425403833389282
Testing Time (seconds):  302.2255790233612
ACCURACY:  0.8155339805825242
       0    1    2    3    4     5     6     7      8     9   10   11   12
0   29.0  2.0  0.0  0.0  0.0   0.0   0.0   0.0    0.0   1.0  0.0  1.0  0.0
1    0.0  7.0  0.0  0.0  0.0   0.0   0.0   0.0    0.0   0.0  0.0  0.0  0.0
2    0.0  0.0  1.0  0.0  0.0   0.0   0.0   0.0    0.0   0.0  0.0  0.0  0.0
3    0.0  0.0  0.0  1.0  0.0   0.0   0.0   0.0    0.0   0.0  0.0  0.0  0.0
4    0.0  0.0  0.0  0.0  1.0   0.0   0.0   0.0    0.0   0.0  0.0  0.0  0.0
5    3.0  0.0  0.0  1.0  0.0  45.0   0.0   2.0    0.0   0.0  0.0  1.0  0.0
6    3.0  0.0  0.0  1.0  0.0   1.0  52.0   1.0    0.0   0.0  0.0  0.0  0.0
7    0.0  1.0  0.0  0.0  0.0   0.0   0.0  24.0    0.0   1.0  0.0  0.0  0.0
8   13.0  4.0  0.0  0.0  0.0   7.0   1.0   1.0  119.0   1.0  0.0  0.0  0.0
9    7.0  5.0  9.0  1.0  1.0   1.0   0.0   0.0    0.0  44.0  2.0  3.0  0.0
10   0.0  0.0  0.0  0.0  0.0   0.0   0.0   0.0    0.0   0

### 5,000 Games, training not truncated by opening_ply, separated by turn

In [5]:
games, test, mcw = LoadData(15000, [False, False], 'by turn')
Training2(games, mcw, opening_cats, test)

Training Time (seconds):  189.7213430404663
Testing Time (seconds):  1834.0691928863525
ACCURACY:  0.766798418972332
       0     1     2    3    4      5      6     7      8      9   10    11  \
0   71.0   5.0   1.0  1.0  2.0    6.0    0.0   0.0    0.0    1.0  2.0   1.0   
1    0.0   1.0   0.0  0.0  0.0    0.0    0.0   0.0    0.0    0.0  0.0   0.0   
2    0.0   0.0   0.0  0.0  0.0    0.0    0.0   0.0    0.0    1.0  0.0   0.0   
3    0.0   0.0   0.0  0.0  0.0    0.0    0.0   0.0    0.0    0.0  0.0   0.0   
4    0.0   0.0   0.0  0.0  0.0    0.0    0.0   0.0    0.0    0.0  0.0   0.0   
5    5.0   5.0   2.0  3.0  0.0  104.0    0.0   4.0    1.0    0.0  0.0   0.0   
6    6.0   5.0   0.0  1.0  0.0    2.0  126.0   2.0    0.0    1.0  0.0   0.0   
7    0.0   3.0   0.0  0.0  0.0    0.0    0.0  50.0    0.0    0.0  0.0   0.0   
8   29.0   7.0   2.0  5.0  4.0   24.0    4.0   4.0  297.0    7.0  0.0   0.0   
9   20.0  12.0  17.0  3.0  5.0    2.0    0.0   4.0    0.0  119.0  2.0  17.0   
10   0.0   0.0

### 5,000 Games, training not truncated by opening_ply, separated by turn with number

In [6]:
games, test, mcw = LoadData(15000, [False, False], 'by turn with number')
Training2(games, mcw, opening_cats, test)

Training Time (seconds):  349.56856298446655
Testing Time (seconds):  1826.8452939987183
ACCURACY:  0.8320158102766798
       0    1     2    3    4     5      6     7      8      9   10    11  \
0   70.0  0.0   0.0  0.0  1.0   0.0    0.0   0.0    0.0    3.0  1.0   1.0   
1    0.0  9.0   0.0  0.0  0.0   0.0    0.0   0.0    0.0    0.0  0.0   0.0   
2    0.0  0.0   6.0  0.0  0.0   0.0    0.0   0.0    0.0    0.0  0.0   0.0   
3    0.0  0.0   0.0  0.0  0.0   0.0    0.0   0.0    0.0    0.0  0.0   0.0   
4    0.0  0.0   0.0  0.0  2.0   0.0    0.0   0.0    0.0    0.0  0.0   0.0   
5    7.0  5.0   0.0  0.0  0.0  97.0    1.0   0.0    1.0    0.0  0.0   1.0   
6    3.0  0.0   0.0  0.0  0.0   2.0  132.0   1.0    0.0    0.0  0.0   0.0   
7    0.0  2.0   0.0  0.0  0.0   1.0    1.0  69.0    0.0    0.0  0.0   0.0   
8   35.0  5.0   0.0  0.0  2.0  17.0    3.0   3.0  312.0    0.0  0.0   0.0   
9   14.0  9.0  19.0  6.0  2.0   0.0    0.0   2.0    0.0  122.0  1.0  12.0   
10   0.0  0.0   0.0  0.0  0.0   0.