In [1]:
import torch

from main import play_sample_game_with_model
from neural_network_utils import load_npy_data
from torch_model import Model, train_model


In [2]:
# Load the data
states, policies, values = load_npy_data()

# Then convert the data into PyTorch tensors
states = torch.from_numpy(states).float().cuda()
policies = torch.from_numpy(policies).float().cuda()
values = torch.from_numpy(values).float().cuda()

print("states.shape:", states.shape)
print("policies.shape:", policies.shape)
print("values.shape:", values.shape)

states.shape: torch.Size([3133, 3, 6, 7])
policies.shape: torch.Size([3133, 7])
values.shape: torch.Size([3133])


In [3]:
# Create a function to group the data into batches
def create_batches(states, policies, values, batch_size=64):
    batches_count = states.shape[0] // batch_size
    batched_data = []

    for batch_index in range(batches_count):
        start_index = 64 * batch_index
        end_index = 64 * (batch_index + 1)

        batch_states = states[start_index: end_index]
        batch_policies = policies[start_index: end_index]
        batch_values = values[start_index: end_index]

        batch_data = (batch_states, (batch_policies, batch_values))
        batched_data.append(batch_data)

    return batched_data


# Create the batches
train_data = create_batches(states, policies, values)

In [4]:
# Instantiate the model
model = Model().cuda()

# Train the model
train_model(model, train_data)

RuntimeError: reduce failed to synchronize: cudaErrorAssert: device-side assert triggered

In [11]:
# Train the model
train_model(model, train_data, epochs=20, learning_rate=0.0001)

[0, 0] loss: 0.005363644957542419
[1, 0] loss: 0.005313974618911743
[2, 0] loss: 0.0052798867225646974
[3, 0] loss: 0.005256625413894653
[4, 0] loss: 0.0052578204870224
[5, 0] loss: 0.005256733298301697
[6, 0] loss: 0.005253722667694092
[7, 0] loss: 0.005254929065704346
[8, 0] loss: 0.005250177383422852
[9, 0] loss: 0.005252848863601684
[10, 0] loss: 0.005253406167030334
[11, 0] loss: 0.005256550312042236
[12, 0] loss: 0.005255236625671387
[13, 0] loss: 0.005254924297332764
[14, 0] loss: 0.005273787379264832
[15, 0] loss: 0.0052841055393218995
[16, 0] loss: 0.00527816891670227
[17, 0] loss: 0.00526566207408905
[18, 0] loss: 0.0052581608295440676
[19, 0] loss: 0.005250056385993958
Done.


In [14]:
play_sample_game_with_model(model)

NN Policy: tensor([ 0.2509,  0.2600,  0.2348,  0.1534, -0.0413,  0.1108,  0.1588])
NN Value: tensor([-0.9441])
Turn: White
0  1  2  3  4  5  6
.  .  .  .  .  .  .  
.  .  .  .  .  .  .  
.  .  .  .  .  .  .  
.  .  .  .  .  .  .  
.  .  .  .  .  .  .  
.  X  .  .  .  .  .  

NN Policy: tensor([ 0.0101,  0.2731,  0.2716,  0.2458,  0.1001,  0.2288, -0.1874])
NN Value: tensor([0.7874])
Turn: Black
0  1  2  3  4  5  6
.  .  .  .  .  .  .  
.  .  .  .  .  .  .  
.  .  .  .  .  .  .  
.  .  .  .  .  .  .  
.  O  .  .  .  .  .  
.  X  .  .  .  .  .  

NN Policy: tensor([ 0.0138,  0.2469,  0.2222,  0.3938,  0.1455,  0.1855, -0.0469])
NN Value: tensor([-0.9816])
Turn: White
0  1  2  3  4  5  6
.  .  .  .  .  .  .  
.  .  .  .  .  .  .  
.  .  .  .  .  .  .  
.  .  .  .  .  .  .  
.  O  .  .  .  .  .  
.  X  .  X  .  .  .  

NN Policy: tensor([-0.1136,  0.2102,  0.1052,  0.2971,  0.2767,  0.3064, -0.0760])
NN Value: tensor([-0.9961])
Turn: Black
0  1  2  3  4  5  6
.  .  .  .  .  .  .  
.  .  . 