In [1]:
import torch
from torch import nn
import torch.optim as optim
import numpy as np
import random

In [2]:
class NN2048(nn.Module):
    def __init__(self, input_size=16, filter1=512, filter2=4096, drop_prob=0.):
        super(NN2048, self).__init__()
        self.conv_a = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,1), padding=0)
        self.conv_b = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,2), padding=0)
        self.conv_aa = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_ab = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.conv_ba = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_bb = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.relu = nn.ReLU()
        self.W_aa = nn.Linear(filter2 * 8, 1)
        self.W_ab = nn.Linear(filter2 * 9, 1)
        self.W_ba = nn.Linear(filter2 * 9, 1)
        self.W_bb = nn.Linear(filter2 * 8, 1)

    def flatten(self, x):
        N = x.size()[0]
        return x.view(N, -1)
        
    def forward(self, x):
        x = x.float()
        a = self.relu(self.conv_a(x))
        b = self.relu(self.conv_b(x))
        aa = self.flatten(self.relu(self.conv_aa(a)))
        ab = self.flatten(self.relu(self.conv_ab(a)))
        ba = self.flatten(self.relu(self.conv_ba(b)))
        bb = self.flatten(self.relu(self.conv_bb(b)))
        out = self.W_aa(aa) + self.W_ab(ab) + self.W_ba(ba) + self.W_bb(bb)
        return out

In [3]:
def make_input(grid):
    r = np.zeros(shape=(16, 4, 4))
    for i in range(4):
        for j in range(4):
            r[grid[i, j],i, j]=1
    return r

def add_two(mat):
    indexs=np.argwhere(mat==0)
    index=np.random.randint(0,len(indexs))
    mat[tuple(indexs[index])] = 1
    return mat

In [4]:
singleScore=[0,0,4,16,48,128,320,768,1792,4096,9216,20480,45056,98304,212992,458752,983040]
moveDict=np.load('move.npy')

def move(list):
    return moveDict[list[0],list[1],list[2],list[3],:]

def lookup(x):
    return singleScore[x]

lookup = np.vectorize(lookup)

def getScore(matrix):
    return np.sum(lookup(matrix))

def getMove(grid):
    board_list = []
    for i in range(4):
        newGrid=moveGrid(grid, i)
        if not isSame(grid,newGrid):
            board_list.append((newGrid, i, getScore(newGrid)))
    return board_list
        
def moveGrid(grid,i):
    # new=np.zeros((4,4),dtype=np.int)
    new = None
    if i==0:
        # move up
        grid=np.transpose(grid)
        new = np.stack([move(grid[row,:]) for row in range(4)], axis = 0).astype(int).T
    elif i==1:
        # move left
        new = np.stack([move(grid[row,:]) for row in range(4)], axis = 0).astype(int)
    elif i==2:
        # move down
        grid=np.transpose(grid)
        new = np.stack([np.flip(move(np.flip(grid[row,:]))) for row in range(4)], axis = 0).astype(int).T
    elif i==3:
        # move right
        new = np.stack([np.flip(move(np.flip(grid[row,:]))) for row in range(4)], axis = 0).astype(int)
    return new

def isSame(grid1,grid2):
    return np.all(grid1==grid2)

In [5]:
def Vchange(grid, v):
    g0 = grid
    g1 = g0[:,::-1,:]
    g2 = g0[:,:,::-1]
    g3 = g2[:,::-1,:]
    r0 = grid.swapaxes(1,2)
    r1 = r0[:,::-1,:]
    r2 = r0[:,:,::-1]
    r3 = r2[:,::-1,:]
    xtrain = np.array([g0,g1,g2,g3,r0,r1,r2,r3])
    ytrain = np.array([v]*8)
    return xtrain, ytrain

def gen_sample_and_learn(model, optimizer, loss_fn, is_train = False, explorationProb=0.1):
    model.eval()
    game_len = 0
    game_score = 0
    last_grid1 = np.zeros((4,4),dtype=np.int)
    last_grid1 = add_two(last_grid1)
    last_grid2 = make_input(last_grid1)
    last_loss = 0

    while True:
        grid_array = add_two(last_grid1)
        board_list = getMove(grid_array)
        if board_list:
            boards = np.array([make_input(g) for g,m,s in board_list])
            p = model(torch.from_numpy(boards).cuda()).flatten().detach()        
            game_len += 1
            best_v = None
            for i, (g,m,s) in enumerate(board_list):
                v = (s - game_score) + p[i].item()
                if best_v is None or v > best_v:
                    best_v = v
                    best_score = s
                    best_grid1 = board_list[i][0]
                    best_grid2 = boards[i]
                    
        else:
            best_v = 0
            best_grid1 = None
            best_grid2 = None
            
        if is_train:
            x, y = Vchange(last_grid2, best_v)
            x = torch.from_numpy(x).cuda()
            y = torch.from_numpy(y).unsqueeze(dim=1).cuda().float()
            model.train()
            optimizer.zero_grad()
            pred = model(x)
            loss = loss_fn(pred, y) / 2
            last_loss = loss.item()
            loss.backward()
            # nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()
            model.eval()
#             if game_len % 30 == 0:
#                 print (game_len, last_loss)
                
        if not board_list:
            break
            
        # gibbs sampling or espilon-greedy
        if is_train and random.random() < explorationProb:
            idx = random.randint(0, len(board_list) - 1)
            game_score = board_list[idx][2]
            last_grid1 = board_list[idx][0]
            last_grid2 = boards[idx]
        else:
            game_score = best_score
            last_grid1 = best_grid1
            last_grid2 = best_grid2
        
    return game_len, 2**grid_array.max(), game_score, last_loss

In [8]:
num_epochs = 100
lr = 1e-3
weight_decay = 1e-5

def train(model):
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay, betas=(0.5, 0.999))
    loss=nn.MSELoss()
    epoch = 0
    while epoch != num_epochs:
        epoch += 1
        game_len, max_score, game_score, last_loss = gen_sample_and_learn(model, optimizer, loss, True, 0)
        print ('epoch', epoch, game_len, max_score, game_score, last_loss)
    
model = NN2048().cuda()
train(model)

epoch 1 142 128 1264 74606952.0
epoch 2 109 64 868 10919294.0
epoch 3 233 256 2560 6857856.0
epoch 4 282 256 3232 4414761.0
epoch 5 251 256 2912 3364850.0
epoch 6 281 256 3144 2398961.75
epoch 7 386 512 5144 5611510.0
epoch 8 721 1024 11224 14043992.0
epoch 9 460 512 6016 2479032.0
epoch 10 484 512 6264 4012723.5
epoch 11 637 512 8824 6213203.0
epoch 12 587 512 7868 2556449.5
epoch 13 620 512 8264 2825159.75
epoch 14 603 512 8124 6206273.0
epoch 15 384 256 4376 1381938.875
epoch 16 325 256 4012 3051944.75
epoch 17 824 1024 12744 24837792.0
epoch 18 1009 1024 15304 9607723.0
epoch 19 473 512 6280 11234986.0
epoch 20 620 512 9020 13031000.0
epoch 21 641 512 8768 5207760.0
epoch 22 424 256 5212 10112334.0
epoch 23 812 1024 12668 20906440.0
epoch 24 477 512 6216 11283237.0
epoch 25 409 256 5000 7158752.0
epoch 26 492 512 6524 6097144.0
epoch 27 902 1024 14096 48867784.0
epoch 28 547 512 7404 13896891.0
epoch 29 903 1024 13672 11451199.0
epoch 30 365 256 4228 9984680.0
epoch 31 997 1024 156

In [9]:
num_epochs = 50

def test(model):
    epoch = 0
    while epoch != num_epochs:
        epoch += 1
        game_len, max_score, game_score, last_loss = gen_sample_and_learn(model, None, None, False)
        print ('epoch', epoch, game_len, max_score, game_score, last_loss)

test(model)

epoch 1 1321 2048 23880 0
epoch 2 1298 2048 23640 0
epoch 3 1544 2048 27648 0
epoch 4 1051 1024 16572 0
epoch 5 1009 1024 16024 0
epoch 6 1451 2048 26220 0
epoch 7 420 512 5604 0
epoch 8 1056 1024 16684 0
epoch 9 1819 2048 32948 0
epoch 10 1040 1024 16464 0
epoch 11 1078 1024 16864 0
epoch 12 1086 1024 16972 0
epoch 13 1922 2048 34984 0
epoch 14 1015 1024 16216 0
epoch 15 1433 2048 25980 0
epoch 16 982 1024 15736 0
epoch 17 1558 2048 27800 0
epoch 18 1248 1024 19928 0
epoch 19 784 1024 12364 0
epoch 20 575 512 7696 0
epoch 21 1919 2048 35016 0
epoch 22 1075 1024 17192 0
epoch 23 1048 1024 16520 0
epoch 24 795 1024 12460 0
epoch 25 743 1024 11772 0
epoch 26 1614 2048 28492 0
epoch 27 915 1024 14684 0
epoch 28 1059 1024 16628 0
epoch 29 1026 1024 16344 0
epoch 30 528 512 7232 0
epoch 31 1035 1024 16428 0
epoch 32 1032 1024 16412 0
epoch 33 1433 2048 25992 0
epoch 34 2035 2048 36472 0
epoch 35 1039 1024 16460 0
epoch 36 795 1024 12460 0
epoch 37 1348 2048 24120 0
epoch 38 1020 1024 16300 