In [9]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [52]:
class ToeDataset(Dataset):
    def __init__(self,filename):
        # list of 9 tuples, each a board state: 0=blank,1=X, -1=O
        #samples = [] 
        # list of good replies, 0 = bad, 1 = good
        #truth   = [] 
        
        self.data = torch.from_numpy(
            np.genfromtxt(filename, delimiter=",",dtype=np.float32)
            ) 
        print(self.data)#.shape())
        print(self.data.size()[0])
    def __len__(self):
        return self.data.size()[0]
    def __getitem__(self,index):
        d = self.data[index]
        sample = {'state' : d[0:9], 'moves': d[9:18] }
        return sample
        
        

In [42]:
fn = 'TicTacToeData.txt'
ds = ToeDataset(fn)

tensor([[ 0.,  0.,  0.,  ...,  1.,  1., nan],
        [ 1.,  0.,  0.,  ...,  0.,  0., nan],
        [ 0.,  1.,  0.,  ...,  1.,  0., nan],
        ...,
        [ 0., -1., -1.,  ...,  0.,  0., nan],
        [ 0., -1., -1.,  ...,  0.,  0., nan],
        [ 0., -1., -1.,  ...,  0.,  0., nan]])
4520


In [43]:
ds[0]

{'state': tensor([0., 0., 0., 0., 0., 0., 0., 0.]),
 'moves': tensor([1., 1., 1., 1., 1., 1., 1., 1.])}

In [44]:
ds[1]

{'state': tensor([1., 0., 0., 0., 0., 0., 0., 0.]),
 'moves': tensor([0., 0., 0., 0., 1., 0., 0., 0.])}

In [72]:
class AI_1(nn.Module):
    def __init__(self):
        super(AI_1, self).__init__()
        
        board_size = 9
        internal1 = 5000 # big enough to memorize 4500ish entries
        self.net = nn.Sequential(
            nn.Linear(board_size, internal1),
            
            nn.ReLU(),
            
            # nn.Linear(internal1, internal1),
            
            # nn.ReLU(),

            nn.Linear(internal1, board_size)                      
            );
    def forward(self, in_data):
        x = self.net(in_data)
        return x

In [47]:
net1 = AI_1()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Get the cuda device
print(device)
net1.to(device)

cuda:0


AI_1(
  (net): Sequential(
    (0): Linear(in_features=9, out_features=5000, bias=True)
    (1): ReLU()
    (2): Linear(in_features=5000, out_features=5000, bias=True)
    (3): ReLU()
    (4): Linear(in_features=5000, out_features=9, bias=True)
  )
)

In [79]:
def train():
    net = AI_1().to(device)
    fn = 'TicTacToeData.txt'
    dataset = ToeDataset(fn)
    loader = DataLoader(dataset, shuffle=True, batch_size = 300)
    
    loss_func = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(),lr=0.001)
    
    for epoch in range(10):
        dataiter = iter(loader)
        
        epoch_loss = 0
        
        for batch in dataiter:
            optimizer.zero_grad()
            
            out = net(batch['state'].to(device))
            
            loss = loss_func(out, batch['moves'].to(device))
            epoch_loss += loss.item()
            
            loss.backward()            
            
            optimizer.step()
            
        print(epoch_loss)
        
        
    sample = dataset[100]
    print(sample)
    out1 = net(sample['state'].to(device))
    print(out1)
            

In [80]:
train()

tensor([[ 0.,  0.,  0.,  ...,  1.,  1., nan],
        [ 1.,  0.,  0.,  ...,  0.,  0., nan],
        [ 0.,  1.,  0.,  ...,  1.,  0., nan],
        ...,
        [ 0., -1., -1.,  ...,  0.,  0., nan],
        [ 0., -1., -1.,  ...,  0.,  0., nan],
        [ 0., -1., -1.,  ...,  0.,  0., nan]])
4520
4.289491653442383
2.223676398396492
1.7022705748677254
1.4790069311857224
1.3654517456889153
1.3384049534797668
1.3200587257742882
1.2834888771176338
1.2523611038923264
1.2783765569329262
{'state': tensor([ 1.,  0.,  0., -1.,  0.,  0.,  1.,  0.,  0.]), 'moves': tensor([0., 0., 0., 0., 1., 0., 0., 0., 0.])}
tensor([-0.0677,  0.4809,  0.4942,  0.0441,  0.8472,  0.4570, -0.1083,  0.4579,
         0.5247], device='cuda:0', grad_fn=<AddBackward0>)
