In [1]:
import torch
from torch import nn
import torch.optim as optim
import numpy as np
import random

In [2]:
class NN2048(nn.Module):
    def __init__(self, input_size=16, filter1=128, filter2=1024, drop_prob=0.):
        super(NN2048, self).__init__()
        self.conv_1 = nn.Conv2d(in_channels=input_size, out_channels=input_size, kernel_size=(1,1), padding=0)
        self.conv_2 = nn.Conv2d(in_channels=input_size, out_channels=input_size, kernel_size=(1,1), padding=0)
        self.conv_3 = nn.Conv2d(in_channels=input_size, out_channels=input_size, kernel_size=(1,1), padding=0)
        self.conv_4 = nn.Conv2d(in_channels=input_size, out_channels=input_size, kernel_size=(1,1), padding=0)
        self.conv_5 = nn.Conv2d(in_channels=input_size, out_channels=input_size, kernel_size=(1,1), padding=0)
        self.conv_6 = nn.Conv2d(in_channels=input_size, out_channels=input_size, kernel_size=(1,1), padding=0)
        
        self.conv_a = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,1), padding=0)
        self.conv_a3 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(3,1), padding=0)
        self.conv_a4 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(4,1), padding=0)
        self.conv_b = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,2), padding=0)
        self.conv_b3 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,3), padding=0)
        self.conv_b4 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,4), padding=0)
        self.conv_c = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,2), padding=0)
        
        self.conv_aa = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_ab = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.conv_ba = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_bb = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        
        self.conv_ab3 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,3), padding=0)
        self.conv_ba3 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(3,1), padding=0)
        self.conv_ab4 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,4), padding=0)
        self.conv_ba4 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(4,1), padding=0)
        self.conv_c2 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,2), padding=0)
        self.pool = nn.MaxPool2d(2)
        
        self.relu = nn.ReLU()
        self.W_aa = nn.Linear(filter2 * 8, 1)
        self.W_ab = nn.Linear(filter2 * 9, 1)
        self.W_ba = nn.Linear(filter2 * 9, 1)
        self.W_bb = nn.Linear(filter2 * 8, 1)
        self.W_ab3 = nn.Linear(filter2 * 4, 1)
        self.W_ba3 = nn.Linear(filter2 * 4, 1)
        self.W_ab4 = nn.Linear(filter2 * 1, 1)
        self.W_ba4 = nn.Linear(filter2 * 1, 1)
        self.W_c = nn.Linear(filter2 * 1, 1)
        self.W_5 = nn.Linear(input_size * 4, 1)
        self.W_6 = nn.Linear(input_size * 16, 1)

    def flatten(self, x):
        N = x.size()[0]
        return x.view(N, -1)
        
    def forward(self, x):
        x = x.float()
        x1 = self.relu(self.conv_1(x))
        x2 = self.relu(self.conv_2(x))
        x3 = self.relu(self.conv_3(x))
        x4 = self.relu(self.conv_4(x))
        x5 = self.flatten(self.relu(self.conv_5(self.pool(x))))
        x6 = self.flatten(self.relu(self.conv_6(x)))
        
        a = self.relu(self.conv_a(x1))
        b = self.relu(self.conv_b(x1))
        c = self.relu(self.conv_c(x2))
        a3 = self.relu(self.conv_a3(x3))
        b3 = self.relu(self.conv_b3(x3))
        a4 = self.relu(self.conv_a4(x4))
        b4 = self.relu(self.conv_b4(x4))
        
        aa = self.flatten(self.relu(self.conv_aa(a)))
        ab = self.flatten(self.relu(self.conv_ab(a)))
        ba = self.flatten(self.relu(self.conv_ba(b)))
        bb = self.flatten(self.relu(self.conv_bb(b)))
        
        ab3 = self.flatten(self.relu(self.conv_ab3(a3)))
        ba3 = self.flatten(self.relu(self.conv_ba3(b3)))
        ab4 = self.flatten(self.relu(self.conv_ab4(a4)))
        ba4 = self.flatten(self.relu(self.conv_ba4(b4)))
        c2 = self.relu(self.conv_c2(c))
        c3 = self.flatten(self.pool(c2))
        
        out = self.W_aa(aa) + self.W_ab(ab) + self.W_ba(ba) + self.W_bb(bb) + \
              self.W_ab4(ab4) + self.W_ba4(ba4) + self.W_c(c3) + \
              self.W_ab3(ab3) + self.W_ba3(ba3) + self.W_5(x5) + self.W_6(x6)
        
        return out

In [3]:
def make_input(grid):
    r = np.zeros(shape=(16, 4, 4))
    for i in range(4):
        for j in range(4):
            r[grid[i, j],i, j]=1
    return r

def add_two(mat):
    indexs=np.argwhere(mat==0)
    index=np.random.randint(0,len(indexs))
    mat[tuple(indexs[index])] = 1
    return mat

In [4]:
singleScore=[0,0,4,16,48,128,320,768,1792,4096,9216,20480,45056,98304,212992,458752,983040]
moveDict=np.load('move.npy')

def move(list):
    return moveDict[list[0],list[1],list[2],list[3],:]

def lookup(x):
    return singleScore[x]

lookup = np.vectorize(lookup)

def getScore(matrix):
    return np.sum(lookup(matrix))

def getMove(grid):
    board_list = []
    for i in range(4):
        newGrid=moveGrid(grid, i)
        if not isSame(grid,newGrid):
            board_list.append((newGrid, i, getScore(newGrid)))
    return board_list
        
def moveGrid(grid,i):
    # new=np.zeros((4,4),dtype=np.int)
    new = None
    if i==0:
        # move up
        grid=np.transpose(grid)
        new = np.stack([move(grid[row,:]) for row in range(4)], axis = 0).astype(int).T
    elif i==1:
        # move left
        new = np.stack([move(grid[row,:]) for row in range(4)], axis = 0).astype(int)
    elif i==2:
        # move down
        grid=np.transpose(grid)
        new = np.stack([np.flip(move(np.flip(grid[row,:]))) for row in range(4)], axis = 0).astype(int).T
    elif i==3:
        # move right
        new = np.stack([np.flip(move(np.flip(grid[row,:]))) for row in range(4)], axis = 0).astype(int)
    return new

def isSame(grid1,grid2):
    return np.all(grid1==grid2)

In [5]:
def Vchange(grid, v):
    g0 = grid
    g1 = g0[:,::-1,:]
    g2 = g0[:,:,::-1]
    g3 = g2[:,::-1,:]
    r0 = grid.swapaxes(1,2)
    r1 = r0[:,::-1,:]
    r2 = r0[:,:,::-1]
    r3 = r2[:,::-1,:]
    xtrain = np.array([g0,g1,g2,g3,r0,r1,r2,r3])
    ytrain = np.array([v]*8)
    return xtrain, ytrain

def gen_sample_and_learn(model, optimizer, loss_fn, is_train = False, explorationProb=0.1):
    model.eval()
    game_len = 0
    game_score = 0
    last_grid1 = np.zeros((4,4),dtype=np.int)
    last_grid1 = add_two(last_grid1)
    last_grid2 = make_input(last_grid1)
    last_loss = 0

    while True:
        grid_array = add_two(last_grid1)
        board_list = getMove(grid_array)
        if board_list:
            boards = np.array([make_input(g) for g,m,s in board_list])
            p = model(torch.from_numpy(boards).cuda()).flatten().detach()        
            game_len += 1
            best_v = None
            for i, (g,m,s) in enumerate(board_list):
                v = (s - game_score) + p[i].item()
                if best_v is None or v > best_v:
                    best_v = v
                    best_score = s
                    best_grid1 = board_list[i][0]
                    best_grid2 = boards[i]
                    
        else:
            best_v = 0
            best_grid1 = None
            best_grid2 = None
            
        if is_train:
            x, y = Vchange(last_grid2, best_v)
            x = torch.from_numpy(x).cuda()
            y = torch.from_numpy(y).unsqueeze(dim=1).cuda().float()
            model.train()
            optimizer.zero_grad()
            pred = model(x)
            loss = loss_fn(pred, y) / 4
            last_loss = loss.item()
            loss.backward()
#             nn.utils.clip_grad_norm_(model.parameters(), 10.0) #
            optimizer.step()
            model.eval()
#             if game_len % 50 == 0:
#                 print (game_len, last_loss)
                
        if not board_list:
            break
            
        # gibbs sampling or espilon-greedy
        if is_train and random.random() < explorationProb:
            idx = random.randint(0, len(board_list) - 1)
            game_score = board_list[idx][2]
            last_grid1 = board_list[idx][0]
            last_grid2 = boards[idx]
        else:
            game_score = best_score
            last_grid1 = best_grid1
            last_grid2 = best_grid2
        
    return game_len, 2**grid_array.max(), game_score, last_loss

In [6]:
lr = 1e-3
weight_decay = 1e-6
beta1 = 0.8

model = NN2048().cuda()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay, betas=(beta1, 0.999))
loss=nn.MSELoss()

In [None]:
num_epochs = 500

def train(model, optimizer, loss):
    epoch = 0
    while epoch != num_epochs:
        epoch += 1
        game_len, max_score, game_score, last_loss = gen_sample_and_learn(model, optimizer, loss, True, 0)
        print ('epoch', epoch, game_len, max_score, game_score, last_loss)
    
train(model, optimizer, loss)

epoch 1 103 64 740 9871229.0
epoch 2 118 64 928 3312308.5
epoch 3 157 128 1452 2230751.75
epoch 4 172 128 1576 1470255.625
epoch 5 207 128 1932 1086375.375
epoch 6 163 128 1492 826240.375
epoch 7 209 128 2000 760496.125
epoch 8 425 512 5628 3388011.25
epoch 9 216 256 2408 552132.5
epoch 10 253 256 2776 652378.0625
epoch 11 328 256 3824 1228282.25
epoch 12 434 256 5276 2206250.5
epoch 13 319 256 3708 691608.5625
epoch 14 360 256 4180 1004677.75
epoch 15 240 256 2684 606612.5
epoch 16 491 512 6732 1481501.5
epoch 17 368 256 4184 905483.0
epoch 18 465 512 6464 1924369.0
epoch 19 505 512 6976 2630374.0
epoch 20 603 512 8112 2274636.5


In [9]:
num_epochs = 50

def test(model):
    epoch = 0
    while epoch != num_epochs:
        epoch += 1
        game_len, max_score, game_score, last_loss = gen_sample_and_learn(model, None, None, False)
        print ('epoch', epoch, game_len, max_score, game_score, last_loss)

test(model)

epoch 1 1321 2048 23880 0
epoch 2 1298 2048 23640 0
epoch 3 1544 2048 27648 0
epoch 4 1051 1024 16572 0
epoch 5 1009 1024 16024 0
epoch 6 1451 2048 26220 0
epoch 7 420 512 5604 0
epoch 8 1056 1024 16684 0
epoch 9 1819 2048 32948 0
epoch 10 1040 1024 16464 0
epoch 11 1078 1024 16864 0
epoch 12 1086 1024 16972 0
epoch 13 1922 2048 34984 0
epoch 14 1015 1024 16216 0
epoch 15 1433 2048 25980 0
epoch 16 982 1024 15736 0
epoch 17 1558 2048 27800 0
epoch 18 1248 1024 19928 0
epoch 19 784 1024 12364 0
epoch 20 575 512 7696 0
epoch 21 1919 2048 35016 0
epoch 22 1075 1024 17192 0
epoch 23 1048 1024 16520 0
epoch 24 795 1024 12460 0
epoch 25 743 1024 11772 0
epoch 26 1614 2048 28492 0
epoch 27 915 1024 14684 0
epoch 28 1059 1024 16628 0
epoch 29 1026 1024 16344 0
epoch 30 528 512 7232 0
epoch 31 1035 1024 16428 0
epoch 32 1032 1024 16412 0
epoch 33 1433 2048 25992 0
epoch 34 2035 2048 36472 0
epoch 35 1039 1024 16460 0
epoch 36 795 1024 12460 0
epoch 37 1348 2048 24120 0
epoch 38 1020 1024 16300 

In [None]:
import os
experiment_dir = "model"
filename = "model3.pth.tar"
num_epochs = 200

def save_model(state, filename='model.pth.tar'):
    filename = os.path.join(experiment_dir, filename)
    torch.save(state, filename)

save_model({
    'epoch': num_epochs,
    'state_dict': model.cpu().state_dict(),
    'optimizer': optimizer.state_dict(),
}, filename)