In [1]:
from c2048 import Game, push

In [2]:
import torch
from torch import nn
import torch.optim as optim
import numpy as np

In [3]:
class NN2048(nn.Module):
    def __init__(self, input_size=16, filter1=512, filter2=4096, drop_prob=0.):
        super(NN2048, self).__init__()
        self.conv_a = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,1), padding=0)
        self.conv_b = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,2), padding=0)
        self.conv_aa = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_ab = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.conv_ba = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_bb = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.relu = nn.ReLU()
        self.W_aa = nn.Linear(filter2 * 8, 1)
        self.W_ab = nn.Linear(filter2 * 9, 1)
        self.W_ba = nn.Linear(filter2 * 9, 1)
        self.W_bb = nn.Linear(filter2 * 8, 1)

    def flatten(self, x):
        N = x.size()[0]
        return x.view(N, -1)
        
    def forward(self, x):
        x = x.float()
        a = self.relu(self.conv_a(x))
        b = self.relu(self.conv_b(x))
        aa = self.flatten(self.relu(self.conv_aa(a)))
        ab = self.flatten(self.relu(self.conv_ab(a)))
        ba = self.flatten(self.relu(self.conv_ba(b)))
        bb = self.flatten(self.relu(self.conv_bb(b)))
        out = self.W_aa(aa) + self.W_ab(ab) + self.W_ba(ba) + self.W_bb(bb)
        return out

In [4]:
table ={2**i:i for i in range(1,16)}
table[0]=0
def make_input(grid):
    g0 = grid
    r = np.zeros(shape=(16, 4, 4))
    for i in range(4):
        for j in range(4):
            v = g0[i, j]
            r[table[v],i, j]=1
    return r

In [5]:
def add_two(mat):
    indexs=np.argwhere(mat==0)
    index=np.random.randint(0,len(indexs))
    mat[tuple(indexs[index])] = 2
    return mat

In [13]:
def Vchange(grid, v):
    g0 = grid
    g1 = g0[:,::-1,:]
    g2 = g0[:,:,::-1]
    g3 = g2[:,::-1,:]
    r0 = grid.swapaxes(1,2)
    r1 = r0[:,::-1,:]
    r2 = r0[:,:,::-1]
    r3 = r2[:,::-1,:]
    xtrain = np.array([g0,g1,g2,g3,r0,r1,r2,r3])
    ytrain = np.array([v]*8)
    return xtrain, ytrain

def gen_sample_and_learn(model, optimizer, loss_fn):
    model.eval()
    game_len = 0
    game_score = 0
    last_grid1 = np.zeros((4,4),dtype=np.int)
    last_grid1 = add_two(last_grid1)
    last_grid2 = make_input(last_grid1)
    while True:
        grid_array = add_two(last_grid1)
        board_list = []
        for m in range(4):
            g = grid_array.copy()
            s = push(g, m%4)
            if s >= 0:
                board_list.append( (g, m, s) )
        if board_list:
            boards = np.array([make_input(g) for g,m,s in board_list])
            p = model(torch.from_numpy(boards).cuda()).flatten().detach()        
            game_len+=1
            best_move = -1
            best_v = None
            for i, (g,m,s) in enumerate(board_list):
                v = 2*s + p[i].item()
                if best_v is None or v > best_v:
                    best_v = v
                    best_move = m
                    best_score = 2*s
                    best_grid1 = board_list[i][0]
                    best_grid2 = boards[i]
                    
            game_score += best_score
        else:
            best_v = 0
            best_grid1 = None
            best_grid2 = None
            
        if last_grid2 is not None:
            x, y = Vchange(last_grid2, best_v)
            x = torch.from_numpy(x).cuda()
            y = torch.from_numpy(y).unsqueeze(dim=1).cuda().float()
            model.train()
            optimizer.zero_grad()
            pred = model(x)
            loss = loss_fn(pred, y) / 2
            last_loss = loss.item()
            loss.backward()
#             nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()
            model.eval()
            if game_len % 50 == 0:
                print (game_len, last_loss)
                
        if not board_list:
            break
        last_grid2 = best_grid2
        last_grid1 = best_grid1
        
    return game_len, grid_array.max(), game_score

In [None]:
lr = 1e-3
weight_decay = 1e-5
model = NN2048().cuda()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay, betas=(0.5, 0.999))
loss_fn=nn.MSELoss()

for j in range(200):
    result = gen_sample_and_learn(model, optimizer, loss_fn)
    print(j, result)
    if result is not None and result[1] >= 4096:
        break

50 10.63271713256836
100 147.51849365234375
0 (119, 64, 876)
50 19.392465591430664
100 136.64404296875
150 242.16519165039062
200 929.3433227539062
1 (223, 128, 2220)
50 7.077906608581543
100 677.0673217773438
150 1582.306396484375
200 622.7932739257812
250 190.98226928710938
2 (259, 256, 2844)
50 2001.0159912109375
100 83.59587097167969
150 140.96478271484375
200 47010.04296875
250 861.298095703125
300 240.00808715820312
350 712.378173828125
3 (370, 256, 4504)
50 122.31902313232422
100 60.43816375732422
150 886.8023071289062
200 11.481607437133789
250 58.055267333984375
300 108.9146728515625
350 4710.59765625
400 13.410293579101562
450 22.82672882080078
4 (495, 512, 6780)
50 58.41758346557617
100 18875.2265625
150 13766.1376953125
200 541.7061157226562
250 1442.818359375
300 385.10687255859375
350 310.71697998046875
350 1790940.375
5 (350, 256, 3916)
50 593.8567504882812
100 436.1355285644531
150 73.76445770263672
200 255.13473510742188
250 6730.208984375
300 294.72662353515625
6 (327

250 807.4866943359375
300 825.410888671875
350 2688.61669921875
400 22222.33984375
450 6246.14208984375
500 1773.177978515625
550 166.8505859375
600 715.978759765625
650 57958.73046875
700 3250.93896484375
750 1839.3636474609375
800 331.8851013183594
850 16399.099609375
900 790.037841796875
950 2925.544189453125
1000 544.0822143554688
1050 133.39569091796875
34 (1060, 1024, 16688)
50 1993.66796875
100 1023.835693359375
150 340.61822509765625
200 135.5201416015625
250 1169.0089111328125
300 2977.04345703125
350 7723.66796875
400 387.5413513183594
450 26989.75
500 18423.49609375
550 499.91162109375
600 4783.5556640625
650 1814.79931640625
700 11837.6201171875
750 485.53326416015625
800 3895.3857421875
850 1235.314208984375
900 454291.625
950 174.19415283203125
35 (953, 1024, 14584)
50 63.619266510009766
100 40.79623794555664
150 890.0287475585938
200 150.07925415039062
250 9564.416015625
300 6112.626953125
350 1254.8314208984375
400 4106.0615234375
450 90.4075927734375
500 5485.838867187