In [1]:
from c2048log import Game, push

In [2]:
import torch
from torch import nn
import torch.optim as optim
import numpy as np

In [3]:
class NN2048(nn.Module):
    def __init__(self, input_size=16, filter1=32, filter2=64, drop_prob=0.):
        super(NN2048, self).__init__()
        self.conv_a = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,1), padding=0)
#         self.conv_a3 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(3,1), padding=0)
        self.conv_a4 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(4,1), padding=0)
        self.conv_b = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,2), padding=0)
#         self.conv_b3 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,3), padding=0)
        self.conv_b4 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,4), padding=0)
        self.conv_c = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,2), padding=0)
        
#         self.conv_a = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,1), padding=0)
#         self.conv_b = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,2), padding=0)
        self.conv_aa = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_ab = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.conv_ba = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_bb = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        
        self.conv_ab4 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,4), padding=0)
        self.conv_ba4 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(4,1), padding=0)
        self.conv_c2 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,2), padding=0)
        self.pool = nn.MaxPool2d(2)
        
        self.relu = nn.ReLU()
        self.W_aa = nn.Linear(filter2 * 8, 1)
        self.W_ab = nn.Linear(filter2 * 9, 1)
        self.W_ba = nn.Linear(filter2 * 9, 1)
        self.W_bb = nn.Linear(filter2 * 8, 1)
        self.W_ab4 = nn.Linear(filter2 * 1, 1)
        self.W_ba4 = nn.Linear(filter2 * 1, 1)
        self.W_c = nn.Linear(filter2 * 1, 1)

    def flatten(self, x):
        N = x.size()[0]
        return x.view(N, -1)
        
    def forward(self, x):
        x = x.float()
        a = self.relu(self.conv_a(x))
        b = self.relu(self.conv_b(x))
        c = self.relu(self.conv_c(x))
        a4 = self.relu(self.conv_a4(x))
        b4 = self.relu(self.conv_b4(x))
        
        aa = self.flatten(self.relu(self.conv_aa(a)))
        ab = self.flatten(self.relu(self.conv_ab(a)))
        ba = self.flatten(self.relu(self.conv_ba(b)))
        bb = self.flatten(self.relu(self.conv_bb(b)))
        ab4 = self.flatten(self.relu(self.conv_ab4(a4)))
        ba4 = self.flatten(self.relu(self.conv_ba4(b4)))
        c2 = self.relu(self.conv_c2(c))
        c3 = self.flatten(self.pool(c2))
        
        out = self.W_aa(aa) + self.W_ab(ab) + self.W_ba(ba) + self.W_bb(bb) + \
              self.W_ab4(ab4) + self.W_ba4(ba4) + self.W_c(c3)
        return out

In [4]:
table ={2**i:i for i in range(1,16)}
table[0]=0
def make_input(grid):
    g0 = grid
    r = np.zeros(shape=(16, 4, 4))
    for i in range(4):
        for j in range(4):
            v = g0[i, j]
            r[table[v],i, j]=1
    return r

In [5]:
def add_two(mat):
    indexs=np.argwhere(mat==0)
    index=np.random.randint(0,len(indexs))
    mat[tuple(indexs[index])] = 2
    return mat

In [6]:
def Vchange(grid, v):
    g0 = grid
    g1 = g0[:,::-1,:]
    g2 = g0[:,:,::-1]
    g3 = g2[:,::-1,:]
    r0 = grid.swapaxes(1,2)
    r1 = r0[:,::-1,:]
    r2 = r0[:,:,::-1]
    r3 = r2[:,::-1,:]
    xtrain = np.array([g0,g1,g2,g3,r0,r1,r2,r3])
    ytrain = np.array([v]*8)
    return xtrain, ytrain

def gen_sample_and_learn(model, optimizer, loss_fn, is_train = True):
    model.eval()
    game_len = 0
    game_score = 0
    last_grid1 = np.zeros((4,4),dtype=np.int)
    last_grid1 = add_two(last_grid1)
    last_grid2 = make_input(last_grid1)
    while True:
        grid_array = add_two(last_grid1)
        board_list = []
        for m in range(4):
            g = grid_array.copy()
            s = push(g, m%4)
            if s >= 0:
                board_list.append( (g, m, s) )
        if board_list:
            boards = np.array([make_input(g) for g,m,s in board_list])
            p = model(torch.from_numpy(boards).cuda()).flatten().detach()        
            game_len+=1
            best_move = -1
            best_v = None
            for i, (g,m,s) in enumerate(board_list):
#                 print (s)
                v = 2 * s + p[i].item()
                if best_v is None or v > best_v:
                    best_v = v
                    best_move = m
                    best_score = 2 * s
                    best_grid1 = board_list[i][0]
                    best_grid2 = boards[i]
                    
            game_score += best_score
        else:
            best_v = 0
            best_grid1 = None
            best_grid2 = None
            
        if is_train:
            x, y = Vchange(last_grid2, best_v)
            x = torch.from_numpy(x).cuda()
            y = torch.from_numpy(y).unsqueeze(dim=1).cuda().float()
            model.train()
            optimizer.zero_grad()
            pred = model(x)
            loss = loss_fn(pred, y)
            last_loss = loss.item()
            loss.backward()
#             nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()
            model.eval()
#             if game_len % 50 == 0:
#                 print (game_len, last_loss)
                
        if not board_list:
            break
        last_grid2 = best_grid2
        last_grid1 = best_grid1
        
    return game_len, grid_array.max(), game_score

In [7]:
lr = 1e-3
weight_decay = 1e-6
model = NN2048().cuda()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay, betas=(0.5, 0.999))
loss_fn=nn.MSELoss()

for j in range(200):
    result = gen_sample_and_learn(model, optimizer, loss_fn)
    print(j, result)
    if result is not None and result[1] >= 4096:
        break

RuntimeError: size mismatch, m1: [256 x 1], m2: [64 x 1] at /opt/conda/conda-bld/pytorch_1565272271120/work/aten/src/THC/generic/THCTensorMathBlas.cu:273

In [24]:
num_epochs = 50

def test(model):
    epoch = 0
    while epoch != num_epochs:
        epoch += 1
        res = gen_sample_and_learn(model, None, None, False)
        print (epoch, res)

test(model)

1 (666, 512, 1243.0)
2 (2066, 2048, 4041.0)
3 (813, 1024, 1553.0)
4 (1391, 1024, 2691.0)
5 (1171, 1024, 2256.0)
6 (685, 512, 1288.0)
7 (1621, 2048, 3153.0)
8 (1874, 2048, 3663.0)
9 (921, 1024, 1764.0)
10 (2177, 2048, 4266.0)
11 (591, 512, 1105.0)
12 (1284, 1024, 2479.0)
13 (1268, 1024, 2450.0)
14 (847, 1024, 1616.0)
15 (304, 256, 536.0)
16 (996, 1024, 1911.0)
17 (1573, 2048, 3067.0)
18 (1627, 2048, 3160.0)
19 (1011, 1024, 1945.0)
20 (1142, 1024, 2197.0)
21 (1199, 1024, 2313.0)
22 (976, 1024, 1875.0)
23 (1158, 1024, 2233.0)
24 (639, 512, 1201.0)
25 (1025, 1024, 1973.0)
26 (1686, 2048, 3283.0)
27 (561, 512, 1045.0)
28 (1732, 2048, 3378.0)
29 (1291, 1024, 2496.0)
30 (544, 512, 1012.0)
31 (1056, 1024, 2027.0)
32 (693, 512, 1303.0)
33 (855, 1024, 1622.0)
34 (588, 512, 1095.0)
35 (1227, 1024, 2363.0)
36 (1351, 1024, 2616.0)
37 (1088, 1024, 2090.0)
38 (294, 256, 517.0)
39 (1551, 2048, 3019.0)
40 (735, 512, 1391.0)
41 (808, 512, 1531.0)
42 (792, 1024, 1508.0)
43 (1158, 1024, 2233.0)
44 (1569, 