In [160]:
from torch import nn
import torch.optim as optim
import numpy as np
import random

In [161]:
class NN2048(nn.Module):
    def __init__(self, input_size=16, filter1=512, filter2=4096, drop_prob=0.):
        super(NN2048, self).__init__()
        self.conv_a = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,1), padding=0)
        self.conv_b = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,2), padding=0)
        self.conv_aa = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_ab = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.conv_ba = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_bb = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.relu = nn.ReLU()
        self.W_aa = nn.Linear(filter2 * 8, 1)
        self.W_ab = nn.Linear(filter2 * 9, 1)
        self.W_ba = nn.Linear(filter2 * 9, 1)
        self.W_bb = nn.Linear(filter2 * 8, 1)

    def flatten(self, x):
        N = x.size()[0]
        return x.view(N, -1)
        
    def forward(self, x):
        x = x.float()
        a = self.relu(self.conv_a(x))
        b = self.relu(self.conv_b(x))
        aa = self.flatten(self.relu(self.conv_aa(a)))
        ab = self.flatten(self.relu(self.conv_ab(a)))
        ba = self.flatten(self.relu(self.conv_ba(b)))
        bb = self.flatten(self.relu(self.conv_bb(b)))
        out = self.W_aa(aa) + self.W_ab(ab) + self.W_ba(ba) + self.W_bb(bb)
        return out
        

In [162]:
singleScore=[0,0,4,16,48,128,320,768,1792,4096,9216,20480,45056,98304,212992,458752,983040]
moveDict=np.load('move.npy')

def add_two(mat):
    indexs=np.argwhere(mat==0)
    index=np.random.randint(0,len(indexs))
    mat[tuple(indexs[index])] = 1
    return mat

def game_state(mat):
    return 'not over' if np.any(mat == 0) or np.any(mat[:, 0:-1]==mat[:, 1:]) or np.any(mat[0:-1, :]==mat[1:, :]) else 'lose'

def move(list):
    return moveDict[list[0],list[1],list[2],list[3],:]

def lookup(x):
    return singleScore[x]

lookup = np.vectorize(lookup)

def getScore(matrix):
    return np.sum(lookup(matrix))

In [163]:
def getMove(grid):
    board_list = []
    for i in range(4):
        newGrid=moveGrid(grid, i)
        if not isSame(grid,newGrid):
            board_list.append((i, newGrid, getScore(newGrid)))
    return board_list
        
def moveGrid(grid,i):
    # new=np.zeros((4,4),dtype=np.int)
    new = None
    if i==0:
        # move up
        grid=np.transpose(grid)
        new = np.stack([move(grid[row,:]) for row in range(4)], axis = 0).astype(int).T
    elif i==1:
        # move left
        new = np.stack([move(grid[row,:]) for row in range(4)], axis = 0).astype(int)
    elif i==2:
        # move down
        grid=np.transpose(grid)
        new = np.stack([np.flip(move(np.flip(grid[row,:]))) for row in range(4)], axis = 0).astype(int).T
    elif i==3:
        # move right
        new = np.stack([np.flip(move(np.flip(grid[row,:]))) for row in range(4)], axis = 0).astype(int)
    return new

def isSame(grid1,grid2):
    return np.all(grid1==grid2)

In [164]:
def make_input(grid):
    r = np.zeros(shape=(16, 4, 4))
    for i in range(4):
        for j in range(4):
            r[grid[i, j], i, j]=1
    return r

def Vchange(grid, v):
    g0 = grid
    g1 = g0[:,::-1,:]
    g2 = g0[:,:,::-1]
    g3 = g2[:,::-1,:]
    r0 = grid.swapaxes(1,2)
    r1 = r0[:,::-1,:]
    r2 = r0[:,:,::-1]
    r3 = r2[:,::-1,:]
    xtrain = np.array([g0,g1,g2,g3,r0,r1,r2,r3])
    ytrain = np.array([v]*8) 
    return torch.from_numpy(xtrain), \
            torch.from_numpy(ytrain).float()#.cuda()

In [None]:
def gen_sample_and_learn(model, optimizer, loss_fn, is_train = False, explorationProb=0.2):
    model.eval()
    game_len = 0
    last_loss = 0
    last_grid = np.zeros((4,4),dtype=np.int)
    last_grid = add_two(last_grid)
    pre_score = 0
    
    while True:
        last_grid = add_two(last_grid) #before or after adding two??
        board_list = getMove(last_grid)
        
        if len(board_list) == 0:
            assert(game_state(last_grid)=='lose')
            best_v = 0
            best_grid = None
        else:
            game_len += 1
            best_v = None
            for (move, newGrid, score) in board_list:
                grid = torch.from_numpy(make_input(newGrid))
                pred = model(grid.unsqueeze(0)).item()
                v = score - pre_score + pred
                if best_v is None or v > best_v:
                    best_v = v
                    best_score = score
                    best_grid = newGrid
                    
        if is_train:
            x, y = Vchange(make_input(last_grid), best_v)
            model.train()
            optimizer.zero_grad()
            pred = model(x)
#             print (pred)
            loss = loss_fn(pred, y) / 2
            loss.backward()
            last_loss = loss.item()
            nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()
            model.eval()
        
        if best_grid is None:
            break
        
        if game_len % 20 == 0:
            print ('game length',game_len, last_loss, best_score, best_v)
        
        # gibbs sampling or espilon-greedy
        if is_train and random.random() < explorationProb:
            (_, last_grid, pre_score) = random.choice(board_list)
        else:
            last_grid = best_grid
            pre_score = best_score
            
    return game_len, last_loss, last_grid, pre_score

In [None]:
num_epochs = 200
lr = 1e-3
weight_decay = 1e-5


def train(model):
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay, betas=(0.5, 0.999))
    loss=nn.MSELoss()
    epoch = 0
    while epoch != num_epochs:
        epoch += 1
        game_len, last_loss, last_grid, pre_score = gen_sample_and_learn(model, optimizer, loss, True)
        print ('epoch', epoch, game_len, last_loss, last_grid, pre_score)
        
def test(model):
    while epoch != num_epochs:
        epoch += 1
        game_len, last_loss, last_grid, pre_score = gen_sample_and_learn(model, optimizer, loss, False)
        print ('epoch', epoch, game_len, last_loss, last_grid, pre_score)
    
model = NN2048() 
train(model)
            

game length 20 340.8243713378906 64 4034.32861328125
game length 40 1967.937255859375 232 10495.3662109375
game length 60 2424.242919921875 332 27111.935546875
game length 80 874.003662109375 592 69763.0703125
game length 100 5132.95068359375 744 90641.5703125
game length 120 12935.3134765625 948 88363.5078125
epoch 1 127 4030662912.0 [[2 4 6 1]
 [4 1 5 2]
 [6 5 3 1]
 [1 2 1 2]] 1024
game length 20 36737668.0 88 116018.875
game length 40 366890784.0 228 355988.375
game length 60 9284000.0 444 1643972.25
game length 80 13080116224.0 700 3998949.5
game length 100 1734656000.0 848 6263317.0
game length 120 3040931072.0 1000 10290032.0
game length 140 16695052288.0 1208 15214809.0
game length 160 39892365312.0 1484 17883878.0
epoch 2 168 184322816475136.0 [[1 2 6 4]
 [2 3 4 2]
 [3 5 3 7]
 [2 3 5 1]] 1520
game length 20 767477350400.0 136 25173150.0
game length 40 2005612160.0 212 22661820.0
game length 60 2345241088.0 476 28173902.0
game length 80 393437478912.0 820 29329692.0
game length 