In [15]:
from c2048log import Game, push

In [16]:
import torch
from torch import nn
import torch.optim as optim
import numpy as np

In [17]:
class NN2048(nn.Module):
    def __init__(self, input_size=16, filter1=512, filter2=4096, drop_prob=0.):
        super(NN2048, self).__init__()
#         self.conv_a2 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,2), padding=0)
#         self.conv_a3 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(3,3), padding=0)
#         self.conv_a4 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(4,4), padding=0)
#         self.conv_b2 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,2), padding=0)
#         self.conv_b3 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,3), padding=0)
#         self.conv_b4 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,4), padding=0)
        self.conv_a = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,1), padding=0)
        self.conv_b = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,2), padding=0)
        self.conv_aa = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_ab = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.conv_ba = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_bb = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.relu = nn.ReLU()
        self.W_aa = nn.Linear(filter2 * 8, 1)
        self.W_ab = nn.Linear(filter2 * 9, 1)
        self.W_ba = nn.Linear(filter2 * 9, 1)
        self.W_bb = nn.Linear(filter2 * 8, 1)

    def flatten(self, x):
        N = x.size()[0]
        return x.view(N, -1)
        
    def forward(self, x):
        x = x.float()
        a = self.relu(self.conv_a(x))
        b = self.relu(self.conv_b(x))
        aa = self.flatten(self.relu(self.conv_aa(a)))
        ab = self.flatten(self.relu(self.conv_ab(a)))
        ba = self.flatten(self.relu(self.conv_ba(b)))
        bb = self.flatten(self.relu(self.conv_bb(b)))
        out = self.W_aa(aa) + self.W_ab(ab) + self.W_ba(ba) + self.W_bb(bb)
        return out

In [18]:
table ={2**i:i for i in range(1,16)}
table[0]=0
def make_input(grid):
    g0 = grid
    r = np.zeros(shape=(16, 4, 4))
    for i in range(4):
        for j in range(4):
            v = g0[i, j]
            r[table[v],i, j]=1
    return r

In [19]:
def add_two(mat):
    indexs=np.argwhere(mat==0)
    index=np.random.randint(0,len(indexs))
    mat[tuple(indexs[index])] = 2
    return mat

In [22]:
def Vchange(grid, v):
    g0 = grid
    g1 = g0[:,::-1,:]
    g2 = g0[:,:,::-1]
    g3 = g2[:,::-1,:]
    r0 = grid.swapaxes(1,2)
    r1 = r0[:,::-1,:]
    r2 = r0[:,:,::-1]
    r3 = r2[:,::-1,:]
    xtrain = np.array([g0,g1,g2,g3,r0,r1,r2,r3])
    ytrain = np.array([v]*8)
    return xtrain, ytrain

def gen_sample_and_learn(model, optimizer, loss_fn, is_train = True):
    model.eval()
    game_len = 0
    game_score = 0
    last_grid1 = np.zeros((4,4),dtype=np.int)
    last_grid1 = add_two(last_grid1)
    last_grid2 = make_input(last_grid1)
    while True:
        grid_array = add_two(last_grid1)
        board_list = []
        for m in range(4):
            g = grid_array.copy()
            s = push(g, m%4)
            if s >= 0:
                board_list.append( (g, m, s) )
        if board_list:
            boards = np.array([make_input(g) for g,m,s in board_list])
            p = model(torch.from_numpy(boards).cuda()).flatten().detach()        
            game_len+=1
            best_move = -1
            best_v = None
            for i, (g,m,s) in enumerate(board_list):
#                 print (s)
                v = 2 * s + p[i].item()
                if best_v is None or v > best_v:
                    best_v = v
                    best_move = m
                    best_score = 2 * s
                    best_grid1 = board_list[i][0]
                    best_grid2 = boards[i]
                    
            game_score += best_score
        else:
            best_v = 0
            best_grid1 = None
            best_grid2 = None
            
        if is_train:
            x, y = Vchange(last_grid2, best_v)
            x = torch.from_numpy(x).cuda()
            y = torch.from_numpy(y).unsqueeze(dim=1).cuda().float()
            model.train()
            optimizer.zero_grad()
            pred = model(x)
            loss = loss_fn(pred, y)
            last_loss = loss.item()
            loss.backward()
#             nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()
            model.eval()
#             if game_len % 50 == 0:
#                 print (game_len, last_loss)
                
        if not board_list:
            break
        last_grid2 = best_grid2
        last_grid1 = best_grid1
        
    return game_len, grid_array.max(), game_score

In [None]:
lr = 1e-3
weight_decay = 1e-5
model = NN2048().cuda()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay, betas=(0.5, 0.999))
loss_fn=nn.MSELoss()

for j in range(200):
    result = gen_sample_and_learn(model, optimizer, loss_fn)
    print(j, result)
    if result is not None and result[1] >= 4096:
        break

0 (211, 256, 361.0)
1 (195, 128, 323.0)
2 (246, 128, 419.0)
3 (279, 256, 495.0)
4 (285, 256, 503.0)
5 (386, 256, 693.0)
6 (927, 1024, 1777.0)
7 (293, 256, 515.0)
8 (458, 256, 842.0)
9 (330, 256, 587.0)
10 (559, 512, 1049.0)
11 (395, 256, 710.0)
12 (609, 512, 1139.0)
13 (917, 1024, 1760.0)
14 (572, 512, 1061.0)
15 (325, 256, 576.0)
16 (818, 1024, 1557.0)
17 (1374, 2048, 2677.0)
18 (453, 512, 835.0)
19 (502, 512, 928.0)
20 (289, 256, 510.0)
21 (463, 512, 852.0)
22 (1140, 1024, 2199.0)
23 (534, 512, 994.0)
24 (731, 1024, 1391.0)
25 (1029, 1024, 1981.0)
26 (739, 1024, 1406.0)
27 (813, 1024, 1547.0)
28 (639, 512, 1201.0)
29 (421, 256, 766.0)
30 (887, 1024, 1697.0)
31 (935, 1024, 1789.0)
32 (542, 512, 1010.0)
33 (803, 1024, 1535.0)
34 (936, 1024, 1792.0)
35 (1008, 1024, 1941.0)
36 (721, 512, 1361.0)
37 (932, 1024, 1779.0)
38 (1042, 1024, 2005.0)
39 (1243, 1024, 2402.0)
40 (1297, 2048, 2520.0)


In [19]:
num_epochs = 50

def test(model):
    epoch = 0
    while epoch != num_epochs:
        epoch += 1
        res = gen_sample_and_learn(model, None, None, False)
        print (epoch, res)

test(model)

1 (1556, 2048, 27764)
2 (1550, 2048, 27744)
3 (1744, 2048, 31704)
4 (1047, 1024, 16540)
5 (1065, 1024, 16752)
6 (1516, 2048, 27200)
7 (676, 512, 9704)
8 (1903, 2048, 34644)
9 (1494, 2048, 26776)
10 (1092, 1024, 17164)
11 (1447, 2048, 26116)
12 (1922, 2048, 34860)
13 (838, 1024, 13048)
14 (1050, 1024, 16556)
15 (1563, 2048, 27836)
16 (1056, 1024, 16600)
17 (1442, 2048, 26076)
18 (1024, 1024, 16332)
19 (543, 512, 7372)
20 (1511, 2048, 27196)
21 (641, 512, 8492)
22 (1045, 1024, 16588)
23 (1107, 1024, 17292)
24 (1055, 1024, 16604)
25 (830, 1024, 12812)
26 (1016, 1024, 16096)
27 (1050, 1024, 16556)
28 (1595, 2048, 28332)
29 (1005, 1024, 16000)
30 (1186, 1024, 18444)
31 (1011, 1024, 16156)
32 (815, 1024, 12684)
33 (1039, 1024, 16460)
34 (1052, 1024, 16580)
35 (1180, 1024, 18376)
36 (2032, 2048, 36504)
37 (1553, 2048, 27736)
38 (935, 1024, 14428)
39 (546, 512, 7388)
40 (1019, 1024, 16236)
41 (1320, 2048, 23816)
42 (613, 512, 8216)
43 (1396, 2048, 24760)
44 (1007, 1024, 16108)
45 (832, 1024, 1