In [10]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [11]:
class ToeDataset(Dataset):
    def __init__(self,filename):
        # list of 9 tuples, each a board state: 0=blank,1=X, -1=O
        #samples = [] 
        # list of good replies, 0 = bad, 1 = good
        #truth   = [] 
        
        self.data = torch.from_numpy(
            np.genfromtxt(filename, delimiter=",",dtype=np.float32)
            ) 
        print(self.data)#.shape())
        print(self.data.size()[0])
    def __len__(self):
        return self.data.size()[0]
    def __getitem__(self,index):
        d = self.data[index]
        sample = {'state' : d[0:9], 'moves': d[9:18] }
        return sample
        
        

In [12]:
fn = 'TicTacToeData.txt'
ds = ToeDataset(fn)

tensor([[ 0.,  0.,  0.,  ...,  1.,  1., nan],
        [ 1.,  0.,  0.,  ...,  0.,  0., nan],
        [ 0.,  1.,  0.,  ...,  1.,  0., nan],
        ...,
        [ 0., -1., -1.,  ...,  0.,  0., nan],
        [ 0., -1., -1.,  ...,  0.,  0., nan],
        [ 0., -1., -1.,  ...,  0.,  0., nan]])
4520


In [13]:
ds[0]

{'state': tensor([0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'moves': tensor([1., 1., 1., 1., 1., 1., 1., 1., 1.])}

In [5]:
ds[1]

{'state': tensor([1., 0., 0., 0., 0., 0., 0., 0., 0.]),
 'moves': tensor([0., 0., 0., 0., 1., 0., 0., 0., 0.])}

In [210]:
class AI_1(nn.Module):
    def __init__(self):
        super(AI_1, self).__init__()
        
        # 2 corners, 350 size internals works
        # 2 corners, 275 size internals fails
        
        board_size = 9
        internal1  = 350 # big enough to memorize 4500ish entries
        self.net   = nn.Sequential(
            nn.Linear(board_size, internal1),
            
            nn.ReLU(),
            
            nn.Linear(internal1, internal1),
            
            #nn.ReLU(),
            
            #nn.Linear(internal1, internal1),
            
            #nn.ReLU(),

            #nn.Linear(internal1, internal1),
            
            nn.ReLU(),            

            nn.Linear(internal1, board_size)                      
            );
    def forward(self, in_data):
        x = self.net(in_data)
        return x

In [201]:
net1 = AI_1()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Get the cuda device
print(device)
net1.to(device)

cuda:0


AI_1(
  (net): Sequential(
    (0): Linear(in_features=9, out_features=275, bias=True)
    (1): ReLU()
    (2): Linear(in_features=275, out_features=275, bias=True)
    (3): ReLU()
    (4): Linear(in_features=275, out_features=9, bias=True)
  )
)

In [202]:
def train():
    net = AI_1().to(device)
    fn = 'TicTacToeData.txt'
    dataset = ToeDataset(fn)
    loader = DataLoader(dataset, shuffle=True, batch_size = 300)
    
    loss_func = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(),lr=0.001)
    
    for epoch in range(500):
        dataiter = iter(loader)
        
        epoch_loss = 0
        
        for batch in dataiter:
            optimizer.zero_grad()
            
            out = net(batch['state'].to(device))
            
            loss = loss_func(out, batch['moves'].to(device))
            epoch_loss += loss.item()
            
            loss.backward()            
            
            optimizer.step()
            
        print(epoch_loss)
        
        
    sample = dataset[100]
    print(sample)
    out1 = net(sample['state'].to(device))
    print(out1)
    return net
            

In [211]:
net = train()

tensor([[ 0.,  0.,  0.,  ...,  1.,  1., nan],
        [ 1.,  0.,  0.,  ...,  0.,  0., nan],
        [ 0.,  1.,  0.,  ...,  1.,  0., nan],
        ...,
        [ 0., -1., -1.,  ...,  0.,  0., nan],
        [ 0., -1., -1.,  ...,  0.,  0., nan],
        [ 0., -1., -1.,  ...,  0.,  0., nan]])
4520
2.6572772562503815
2.0336792543530464
1.5916908457875252
1.3439830765128136
1.2631757333874702
1.227058857679367
1.1710941717028618
1.142081007361412
1.1275925636291504
1.0943470522761345
1.0763061046600342
1.0504592061042786
1.0477298460900784
1.0035236775875092
1.0057264678180218
0.9792597219347954
0.9769995994865894
0.9576794803142548
0.9488920159637928
0.9296834208071232
0.9325046837329865
0.9145033992826939
0.8954276740550995
0.8979905061423779
0.8807524368166924
0.8664508834481239
0.8576922379434109
0.8553134053945541
0.8708939179778099
0.8340794593095779
0.8169158659875393
0.8296901285648346
0.8074702173471451
0.8117688372731209
0.7834704518318176
0.7921396531164646
0.8166643008589745
0.77

0.13180328719317913
0.13316433504223824
0.1374906892888248
0.13544325390830636
0.12743340153247118
0.13217527931556106
0.14093870483338833
0.136001733597368
0.14042984321713448
0.12575004529207945
0.1285915533080697
0.12867113901302218
0.1280670785345137
0.13176402868703008
0.12734649423509836
0.1234326851554215
0.12473432160913944
0.12591570895165205
0.12325229356065392
0.12271815445274115
0.12616942403838038
0.13327718852087855
0.13193385861814022
0.1256871186196804
0.12333728326484561
0.12219238700345159
0.11902703391388059
0.12046280689537525
0.11590688349679112
0.11349154636263847
0.12154862098395824
0.12054250668734312
0.11608629627153277
0.12089894618839025
0.11247481312602758
0.1138797695748508
0.12818618351593614
0.12282279739156365
0.11655153939500451
0.11368130566552281
0.11344160651788116
0.12226659944280982
0.12598552973940969
0.12385676568374038
0.12374743819236755
0.12261227145791054
0.11977852834388614
0.11113854078575969
0.11641282588243484
0.11586029315367341
0.115040

In [206]:
test1 = torch.FloatTensor([1,0,0,0,0,0,0,0,0]).to(device)
net.forward(test1)

tensor([-0.0283,  0.0993, -0.0317,  0.0339,  0.8389, -0.0849,  0.1238,  0.0043,
         0.1151], device='cuda:0', grad_fn=<AddBackward0>)

In [80]:
ds[123]

{'state': tensor([ 1.,  0.,  0.,  0.,  0.,  0., -1.,  0.,  1.]),
 'moves': tensor([0., 1., 1., 1., 1., 1., 0., 1., 0.])}

In [107]:
def computeOne(index):
    test1 = torch.FloatTensor(ds[index]['state']).to(device)
    return net.forward(test1)
    
def drawOne(index):
    test1 = torch.FloatTensor(ds[index]['state']).to(device)
    print(net.forward(test1) - torch.FloatTensor(ds[index]['moves']).to(device))

In [82]:
drawOne(1234)

tensor([-0.0134, -0.0064,  0.0010, -0.0042,  0.0050, -0.0175,  0.0240,  0.0243,
         0.0065], device='cuda:0', grad_fn=<SubBackward0>)


In [203]:
def cleaner(index):
    # turn vector into 0,1 best guesses
    vec = computeOne(index)
    #vec = torch.FloatTensor([1,0,0,0,0,0,0,0,0]).to(device)
    #vec = net.forward(vec)
    max = torch.max(vec).item()
    min = torch.min(vec).item()
    # cleaned = (vec-min)/(max-min) > 0.5
    cleaned = vec > 0.5
    cc = cleaned.float()
    return cc
    # print(max,min,vec,cleaned,cc)
    
    

In [166]:
cleaner(1234)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 1.], device='cuda:0')

ds[1234]


In [167]:
ds[1234]

{'state': tensor([ 1.,  0., -1.,  0.,  1., -1.,  1.,  0.,  0.]),
 'moves': tensor([0., 0., 0., 0., 0., 0., 0., 0., 1.])}

In [204]:
# check all match truth
# todo - rewrite to do all in one pass on GPU
def checkAll():
    failed = 0
    for i in range(len(ds)):
        ans = cleaner(i)
        truth = ds[i]['moves'].to(device)
        diff = torch.max(abs(ans-truth)).item()
        if diff > 0:
            print(i,ans,truth,diff)
            failed = failed + 1
    print('failed:',failed)
        

In [212]:
checkAll()

failed: 0


computeOne(0)

In [170]:
computeOne(6)

tensor([ 0.0168, -0.0028,  0.9695,  0.2486,  0.9427,  0.0703,  0.1640,  0.0597,
         0.9497], device='cuda:0', grad_fn=<AddBackward0>)