In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [3]:
class ToeDataset(Dataset):
    def __init__(self,filename):
        # list of 9 tuples, each a board state: 0=blank,1=X, -1=O
        #samples = [] 
        # list of good replies, 0 = bad, 1 = good
        #truth   = [] 
        
        self.data = torch.from_numpy(
            np.genfromtxt(filename, delimiter=",",dtype=np.float32)
            ) 
        print(self.data)#.shape())
        print(self.data.size()[0])
    def __len__(self):
        return self.data.size()[0]
    def __getitem__(self,index):
        d = self.data[index]
        sample = {'state' : d[0:9], 'moves': d[9:18] }
        return sample
        
        

In [4]:
fn = 'TicTacToeData.txt'
ds = ToeDataset(fn)

tensor([[ 0.,  0.,  0.,  ...,  1.,  1., nan],
        [ 1.,  0.,  0.,  ...,  0.,  0., nan],
        [ 0.,  1.,  0.,  ...,  1.,  0., nan],
        ...,
        [ 0., -1., -1.,  ...,  0.,  0., nan],
        [ 0., -1., -1.,  ...,  0.,  0., nan],
        [ 0., -1., -1.,  ...,  0.,  0., nan]])
4520


In [5]:
ds[0]

{'state': tensor([0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'moves': tensor([1., 1., 1., 1., 1., 1., 1., 1., 1.])}

In [6]:
ds[1]

{'state': tensor([1., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'moves': tensor([0., 0., 0., 0., 1., 0., 0., 0., 0.])}

In [56]:
class AI_1(nn.Module):
    def __init__(self):
        super(AI_1, self).__init__()
        
        # 2 corners, 350 size internals works
        # 2 corners, 275 size internals fails
        # MSE size 350, 500 epoch works
        
        
        board_size = 9
        internal1  = 350 # big enough to memorize 4500ish entries
        self.net   = nn.Sequential(
            nn.Linear(board_size, internal1),
            
            nn.ReLU(),
            
            nn.Linear(internal1, internal1),
            
            #nn.ReLU(),
            
            #nn.Linear(internal1, internal1),
            
            #nn.ReLU(),

            #nn.Linear(internal1, internal1),
            
            nn.ReLU(),            

            nn.Linear(internal1, board_size)                      
            );
    def forward(self, in_data):
        x = self.net(in_data)
        return x

In [57]:
net1 = AI_1()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Get the cuda device
print(device)
net1.to(device)

cuda:0


AI_1(
  (net): Sequential(
    (0): Linear(in_features=9, out_features=350, bias=True)
    (1): ReLU()
    (2): Linear(in_features=350, out_features=350, bias=True)
    (3): ReLU()
    (4): Linear(in_features=350, out_features=9, bias=True)
  )
)

In [85]:
def train():
    net = AI_1().to(device)
    fn = 'TicTacToeData.txt'
    dataset = ToeDataset(fn)
    loader = DataLoader(dataset, shuffle=True, batch_size = 300)
    
    loss_func = nn.MSELoss() # 0.11 in 500 epochs
    #loss_func = nn.L1Loss()  # still has 220 errors at 1500 epochs, 
    #optimizer = torch.optim.Adam(net.parameters(),lr=0.001)
    #optimizer = torch.optim.RMSprop(net.parameters(),lr=0.01)
    optimizer = torch.optim.SGD(net.parameters(),lr=0.1)
    
    max_epoch = 500
    for epoch in range(max_epoch):
        dataiter = iter(loader)
        
        epoch_loss = 0
        
        for batch in dataiter:
            optimizer.zero_grad()
            
            out = net(batch['state'].to(device))
            
            loss = loss_func(out, batch['moves'].to(device))
            epoch_loss += loss.item()
            
            loss.backward()            
            
            optimizer.step()
            
        print(f'epoch/max {epoch}/{max_epoch} loss {epoch_loss}')
        
        
    sample = dataset[100]
    print(sample)
    out1 = net(sample['state'].to(device))
    print(out1)
    return net
            

In [86]:
net = train()

tensor([[ 0.,  0.,  0.,  ...,  1.,  1., nan],
        [ 1.,  0.,  0.,  ...,  0.,  0., nan],
        [ 0.,  1.,  0.,  ...,  1.,  0., nan],
        ...,
        [ 0., -1., -1.,  ...,  0.,  0., nan],
        [ 0., -1., -1.,  ...,  0.,  0., nan],
        [ 0., -1., -1.,  ...,  0.,  0., nan]])
4520
epoch/max 0/500 loss 2.9786753058433533
epoch/max 1/500 loss 2.7196363359689713
epoch/max 2/500 loss 2.721539258956909
epoch/max 3/500 loss 2.6490230709314346
epoch/max 4/500 loss 2.6048718839883804
epoch/max 5/500 loss 2.596199482679367
epoch/max 6/500 loss 2.5801521837711334
epoch/max 7/500 loss 2.5245922654867172
epoch/max 8/500 loss 2.4969595223665237
epoch/max 9/500 loss 2.4379574954509735
epoch/max 10/500 loss 2.4372216910123825
epoch/max 11/500 loss 2.4115623831748962
epoch/max 12/500 loss 2.398546740412712
epoch/max 13/500 loss 2.3946722745895386
epoch/max 14/500 loss 2.321820929646492
epoch/max 15/500 loss 2.3041509687900543
epoch/max 16/500 loss 2.2595780789852142
epoch/max 17/500 loss 

epoch/max 193/500 loss 1.0452578850090504
epoch/max 194/500 loss 1.053735576570034
epoch/max 195/500 loss 1.0478699542582035
epoch/max 196/500 loss 1.0664040632545948
epoch/max 197/500 loss 1.0353215858340263
epoch/max 198/500 loss 1.0425026528537273
epoch/max 199/500 loss 1.031612679362297
epoch/max 200/500 loss 1.0491656810045242
epoch/max 201/500 loss 1.0380432605743408
epoch/max 202/500 loss 1.0408958457410336
epoch/max 203/500 loss 1.0355584770441055
epoch/max 204/500 loss 1.0404439009726048
epoch/max 205/500 loss 1.038286980241537
epoch/max 206/500 loss 1.0490788482129574
epoch/max 207/500 loss 1.0313472785055637
epoch/max 208/500 loss 1.0390153229236603
epoch/max 209/500 loss 1.0297736451029778
epoch/max 210/500 loss 1.026357401162386
epoch/max 211/500 loss 1.0330177247524261
epoch/max 212/500 loss 1.0216062441468239
epoch/max 213/500 loss 1.035934917628765
epoch/max 214/500 loss 1.0354390069842339
epoch/max 215/500 loss 1.0412948206067085
epoch/max 216/500 loss 1.02383150532841

epoch/max 389/500 loss 0.9198148138821125
epoch/max 390/500 loss 0.9160556755959988
epoch/max 391/500 loss 0.8936569951474667
epoch/max 392/500 loss 0.9017844311892986
epoch/max 393/500 loss 0.9105147682130337
epoch/max 394/500 loss 0.8984433114528656
epoch/max 395/500 loss 0.9233234636485577
epoch/max 396/500 loss 0.9032520353794098
epoch/max 397/500 loss 0.8913984932005405
epoch/max 398/500 loss 0.9042571373283863
epoch/max 399/500 loss 0.9051567763090134
epoch/max 400/500 loss 0.886210672557354
epoch/max 401/500 loss 0.8883064277470112
epoch/max 402/500 loss 0.890726700425148
epoch/max 403/500 loss 0.8792345449328423
epoch/max 404/500 loss 0.8800850920379162
epoch/max 405/500 loss 0.9144471921026707
epoch/max 406/500 loss 0.909564945846796
epoch/max 407/500 loss 0.8945071697235107
epoch/max 408/500 loss 0.8862802051007748
epoch/max 409/500 loss 0.9006726965308189
epoch/max 410/500 loss 0.8959396928548813
epoch/max 411/500 loss 0.8917993381619453
epoch/max 412/500 loss 0.910484403371

In [11]:
test1 = torch.FloatTensor([1,0,0,0,0,0,0,0,0]).to(device)
net.forward(test1)

tensor([-0.0332,  0.0501, -0.0023,  0.0106,  0.7952,  0.1709,  0.0867,  0.0576,
        -0.0474], device='cuda:0', grad_fn=<AddBackward0>)

In [14]:
ds[123]

{'state': tensor([ 1.,  0.,  0.,  0.,  0.,  0., -1.,  0.,  1.]),
 'moves': tensor([0., 1., 1., 1., 1., 1., 0., 1., 0.])}

In [15]:
def computeOne(index):
    test1 = torch.FloatTensor(ds[index]['state']).to(device)
    return net.forward(test1)
    
def drawOne(index):
    test1 = torch.FloatTensor(ds[index]['state']).to(device)
    print(net.forward(test1) - torch.FloatTensor(ds[index]['moves']).to(device))

In [16]:
drawOne(1234)

tensor([-0.0381, -0.0367, -0.0864,  0.0020,  0.0422,  0.0197,  0.0024,  0.0476,
         0.0312], device='cuda:0', grad_fn=<SubBackward0>)


In [60]:
def cleaner(index):
    # turn vector into 0,1 best guesses
    vec = computeOne(index)
    #vec = torch.FloatTensor([1,0,0,0,0,0,0,0,0]).to(device)
    #vec = net.forward(vec)
    max = torch.max(vec).item()
    min = torch.min(vec).item()
    # cleaned = (vec-min)/(max-min) > 0.5
    cleaned = vec > 0.5
    cc = cleaned.float()
    return cc
    # print(max,min,vec,cleaned,cc)
    
    

In [51]:
cleaner(1234)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 1.], device='cuda:0')

ds[1234]


In [52]:
ds[1234]

{'state': tensor([ 1.,  0., -1.,  0.,  1., -1.,  1.,  0.,  0.]),
 'moves': tensor([0., 0., 0., 0., 0., 0., 0., 0., 1.])}

In [53]:
# check all match truth
# todo - rewrite to do all in one pass on GPU
def checkAll():
    failed = 0
    for i in range(len(ds)):
        ans = cleaner(i)
        truth = ds[i]['moves'].to(device)
        diff = torch.max(abs(ans-truth)).item()
        if diff > 0:
            print(i,ans,truth,diff)
            failed = failed + 1
    print('failed:',failed)
        

In [74]:
checkAll()

76 tensor([1., 0., 0., 0., 0., 0., 0., 1., 0.], device='cuda:0') tensor([1., 0., 0., 0., 0., 0., 1., 1., 0.], device='cuda:0') 1.0
160 tensor([1., 0., 1., 1., 0., 1., 0., 0., 1.], device='cuda:0') tensor([1., 0., 1., 1., 0., 1., 0., 0., 0.], device='cuda:0') 1.0
189 tensor([0., 1., 0., 0., 1., 0., 1., 1., 1.], device='cuda:0') tensor([0., 1., 0., 1., 1., 0., 1., 1., 1.], device='cuda:0') 1.0
190 tensor([0., 1., 0., 1., 1., 1., 0., 0., 1.], device='cuda:0') tensor([0., 1., 0., 1., 1., 1., 0., 1., 1.], device='cuda:0') 1.0
195 tensor([1., 0., 0., 0., 1., 0., 1., 0., 1.], device='cuda:0') tensor([1., 0., 0., 1., 1., 0., 1., 1., 1.], device='cuda:0') 1.0
379 tensor([0., 0., 0., 0., 1., 0., 0., 1., 1.], device='cuda:0') tensor([0., 0., 0., 0., 1., 0., 0., 0., 0.], device='cuda:0') 1.0
383 tensor([0., 0., 0., 0., 1., 0., 1., 1., 1.], device='cuda:0') tensor([0., 0., 0., 0., 0., 0., 1., 0., 1.], device='cuda:0') 1.0
384 tensor([0., 0., 0., 0., 1., 0., 0., 1., 1.], device='cuda:0') tensor([0.,

1961 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0') tensor([0., 0., 1., 0., 0., 0., 0., 0., 0.], device='cuda:0') 1.0
1962 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0') tensor([0., 0., 1., 0., 0., 0., 0., 0., 0.], device='cuda:0') 1.0
1975 tensor([0., 0., 0., 0., 1., 0., 0., 0., 0.], device='cuda:0') tensor([0., 0., 1., 0., 1., 0., 0., 0., 0.], device='cuda:0') 1.0
1976 tensor([0., 0., 0., 0., 1., 0., 0., 0., 0.], device='cuda:0') tensor([0., 0., 1., 0., 1., 0., 0., 0., 0.], device='cuda:0') 1.0
1977 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0') tensor([0., 0., 1., 0., 0., 0., 0., 0., 0.], device='cuda:0') 1.0
1980 tensor([0., 1., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0') tensor([0., 1., 0., 0., 1., 0., 0., 0., 0.], device='cuda:0') 1.0
1990 tensor([0., 0., 0., 0., 0., 0., 0., 0., 1.], device='cuda:0') tensor([0., 0., 1., 0., 0., 0., 0., 0., 1.], device='cuda:0') 1.0
1991 tensor([0., 0., 0., 0., 0., 0., 0., 1., 0.], device='cuda:0') te

2734 tensor([0., 0., 0., 0., 1., 0., 1., 0., 0.], device='cuda:0') tensor([0., 0., 0., 1., 1., 0., 1., 0., 0.], device='cuda:0') 1.0
2828 tensor([0., 0., 1., 0., 0., 0., 0., 0., 0.], device='cuda:0') tensor([0., 0., 1., 0., 0., 0., 1., 0., 0.], device='cuda:0') 1.0
2832 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0') tensor([0., 0., 1., 0., 0., 0., 0., 0., 0.], device='cuda:0') 1.0
2867 tensor([1., 0., 0., 0., 0., 0., 0., 1., 1.], device='cuda:0') tensor([1., 0., 0., 0., 0., 0., 0., 0., 1.], device='cuda:0') 1.0
2969 tensor([1., 0., 0., 0., 0., 0., 0., 0., 1.], device='cuda:0') tensor([1., 0., 1., 0., 0., 0., 0., 0., 1.], device='cuda:0') 1.0
2978 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0') tensor([0., 0., 1., 0., 0., 0., 0., 0., 0.], device='cuda:0') 1.0
3013 tensor([1., 0., 0., 0., 0., 0., 0., 0., 1.], device='cuda:0') tensor([1., 0., 1., 0., 0., 0., 0., 0., 1.], device='cuda:0') 1.0
3016 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0') te

4097 tensor([1., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0') tensor([1., 0., 1., 0., 0., 0., 0., 0., 0.], device='cuda:0') 1.0
4118 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0') tensor([0., 0., 1., 0., 0., 0., 0., 0., 0.], device='cuda:0') 1.0
4134 tensor([0., 1., 0., 0., 0., 1., 1., 0., 0.], device='cuda:0') tensor([0., 0., 0., 0., 0., 1., 1., 0., 0.], device='cuda:0') 1.0
4192 tensor([1., 0., 0., 0., 0., 1., 0., 0., 1.], device='cuda:0') tensor([1., 0., 0., 0., 0., 0., 0., 0., 1.], device='cuda:0') 1.0
4223 tensor([0., 0., 1., 0., 0., 0., 0., 1., 0.], device='cuda:0') tensor([0., 0., 1., 0., 0., 0., 0., 0., 0.], device='cuda:0') 1.0
4226 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0') tensor([0., 0., 1., 0., 0., 0., 0., 0., 0.], device='cuda:0') 1.0
4227 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0') tensor([0., 0., 1., 0., 0., 0., 0., 0., 0.], device='cuda:0') 1.0
4237 tensor([0., 0., 0., 0., 0., 0., 0., 0., 1.], device='cuda:0') te

computeOne(0)

In [76]:
computeOne(4433)

tensor([ 0.0117,  0.0130, -0.1109,  0.0049,  0.0718, -0.0186, -0.0238,  0.0297,
         0.0065], device='cuda:0', grad_fn=<AddBackward0>)