In [1]:
import torch
from torch import nn
import torch.optim as optim
import numpy as np
import random

In [2]:
class NN2048(nn.Module):
    def __init__(self, input_size=16, filter1=256, filter2=2048, drop_prob=0.):
        super(NN2048, self).__init__()
        self.conv_a = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,1), padding=0)
        self.conv_b = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,2), padding=0)
        self.conv_c = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,2), padding=0)
        
        self.conv_aa = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_ab = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.conv_ba = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_bb = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.conv_cc = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,2), padding=0)
        
        self.relu = nn.ReLU()
        self.W_aa = nn.Linear(filter2 * 8, 1)
        self.W_ab = nn.Linear(filter2 * 9, 1)
        self.W_ba = nn.Linear(filter2 * 9, 1)
        self.W_bb = nn.Linear(filter2 * 8, 1)
        self.W_cc = nn.Linear(filter2 * 4, 1)

    def flatten(self, x):
        N = x.size()[0]
        return x.view(N, -1)
        
    def forward(self, x):
        x = x.float()
        a = self.relu(self.conv_a(x))
        b = self.relu(self.conv_b(x))
        c = self.relu(self.conv_c(x))
        aa = self.flatten(self.relu(self.conv_aa(a)))
        ab = self.flatten(self.relu(self.conv_ab(a)))
        ba = self.flatten(self.relu(self.conv_ba(b)))
        bb = self.flatten(self.relu(self.conv_bb(b)))
        cc = self.flatten(self.relu(self.conv_cc(c)))
        out = self.W_aa(aa) + self.W_ab(ab) + self.W_ba(ba) + self.W_bb(bb) + self.W_cc(cc)
        return out

In [3]:
def make_input(grid):
    r = np.zeros(shape=(16, 4, 4))
    for i in range(4):
        for j in range(4):
            r[grid[i, j],i, j]=1
    return r

def add_two(mat):
    indexs=np.argwhere(mat==0)
    index=np.random.randint(0,len(indexs))
    mat[tuple(indexs[index])] = 1
    return mat

In [4]:
singleScore=[0,0,4,16,48,128,320,768,1792,4096,9216,20480,45056,98304,212992,458752,983040]
moveDict=np.load('move.npy')

def move(list):
    return moveDict[list[0],list[1],list[2],list[3],:]

def lookup(x):
    return singleScore[x]

lookup = np.vectorize(lookup)

def getScore(matrix):
    return np.sum(lookup(matrix))

def getMove(grid):
    board_list = []
    for i in range(4):
        newGrid=moveGrid(grid, i)
        if not isSame(grid,newGrid):
            board_list.append((newGrid, i, getScore(newGrid)))
    return board_list
        
def moveGrid(grid,i):
    # new=np.zeros((4,4),dtype=np.int)
    new = None
    if i==0:
        # move up
        grid=np.transpose(grid)
        new = np.stack([move(grid[row,:]) for row in range(4)], axis = 0).astype(int).T
    elif i==1:
        # move left
        new = np.stack([move(grid[row,:]) for row in range(4)], axis = 0).astype(int)
    elif i==2:
        # move down
        grid=np.transpose(grid)
        new = np.stack([np.flip(move(np.flip(grid[row,:]))) for row in range(4)], axis = 0).astype(int).T
    elif i==3:
        # move right
        new = np.stack([np.flip(move(np.flip(grid[row,:]))) for row in range(4)], axis = 0).astype(int)
    return new

def isSame(grid1,grid2):
    return np.all(grid1==grid2)

In [5]:
def Vchange(grid, v):
    g0 = grid
    g1 = g0[:,::-1,:]
    g2 = g0[:,:,::-1]
    g3 = g2[:,::-1,:]
    r0 = grid.swapaxes(1,2)
    r1 = r0[:,::-1,:]
    r2 = r0[:,:,::-1]
    r3 = r2[:,::-1,:]
    xtrain = np.array([g0,g1,g2,g3,r0,r1,r2,r3])
    ytrain = np.array([v]*8)
    return xtrain, ytrain

def gen_sample_and_learn(model, optimizer, loss_fn, is_train = False, explorationProb=0.1):
    model.eval()
    game_len = 0
    game_score = 0
    last_grid1 = np.zeros((4,4),dtype=np.int)
    last_grid1 = add_two(last_grid1)
    last_grid2 = make_input(last_grid1)
    last_loss = 0

    while True:
        grid_array = add_two(last_grid1)
        board_list = getMove(grid_array)
        if board_list:
            boards = np.array([make_input(g) for g,m,s in board_list])
            p = model(torch.from_numpy(boards).cuda()).flatten().detach()        
            game_len += 1
            best_v = None
            for i, (g,m,s) in enumerate(board_list):
                v = (s - game_score) + p[i].item()
                if best_v is None or v > best_v:
                    best_v = v
                    best_score = s
                    best_grid1 = board_list[i][0]
                    best_grid2 = boards[i]
                    
        else:
            best_v = 0
            best_grid1 = None
            best_grid2 = None
            
        if is_train:
            x, y = Vchange(last_grid2, best_v)
            x = torch.from_numpy(x).cuda()
            y = torch.from_numpy(y).unsqueeze(dim=1).cuda().float()
            model.train()
            optimizer.zero_grad()
            pred = model(x)
            loss = loss_fn(pred, y) / 2
            last_loss = loss.item()
            loss.backward()
#             nn.utils.clip_grad_norm_(model.parameters(), 10.0) #
            optimizer.step()
            model.eval()
#             if game_len % 50 == 0:
#                 print (game_len, last_loss)
                
        if not board_list:
            break
            
        # gibbs sampling or espilon-greedy
        if is_train and random.random() < explorationProb:
            idx = random.randint(0, len(board_list) - 1)
            game_score = board_list[idx][2]
            last_grid1 = board_list[idx][0]
            last_grid2 = boards[idx]
        else:
            game_score = best_score
            last_grid1 = best_grid1
            last_grid2 = best_grid2
        
    return game_len, 2**grid_array.max(), game_score, last_loss

In [6]:
lr = 1e-4
weight_decay = 0
beta1 = 0.9

model = NN2048().cuda()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay, betas=(beta1, 0.999))
loss=nn.MSELoss()

In [7]:
import os
experiment_dir = "model"

def save_model(state, filename='model.pth.tar'):
    filename = os.path.join(experiment_dir, filename)
    torch.save(state, filename)

In [None]:
def load_model(model, optimizer, checkpoint_path):
    ckpt_dict = torch.load(checkpoint_path, map_location="cuda:0")

    model.load_state_dict(ckpt_dict['state_dict'])
    optimizer.load_state_dict(ckpt_dict['optimizer'])
    epoch = ckpt_dict['epoch']
    return model, optimizer, epoch

# model, optimizer, epoch = load_model(model, optimizer, "model/model7_500.pth.tar")

In [None]:
num_epochs = 5000

def train(model, optimizer, loss):
    epoch = 0
    while epoch != num_epochs:
        epoch += 1
        game_len, max_score, game_score, last_loss = gen_sample_and_learn(model, optimizer, loss, True, 0)
        print ('Game # ', epoch, 'Game length ', game_len, 'Max score ', max_score, 'Game score ', game_score, flush=True)
        if epoch % 500 == 0:
            filename = "model8_"+str(epoch)+".pth.tar"
            save_model({
                'epoch': epoch,
                'state_dict': model.cpu().state_dict(),
                'optimizer': optimizer.state_dict(),
            }, filename)
            model.cuda()
    
    
train(model, optimizer, loss)

Game #  1 Game length  98 Max score  32 Game score  632
Game #  2 Game length  161 Max score  128 Game score  1484
Game #  3 Game length  219 Max score  256 Game score  2464
Game #  4 Game length  122 Max score  64 Game score  984
Game #  5 Game length  227 Max score  128 Game score  2248
Game #  6 Game length  454 Max score  512 Game score  6060
Game #  7 Game length  410 Max score  512 Game score  5512
Game #  8 Game length  184 Max score  128 Game score  1676
Game #  9 Game length  269 Max score  256 Game score  2944
Game #  10 Game length  281 Max score  256 Game score  3200
Game #  11 Game length  270 Max score  256 Game score  2976
Game #  12 Game length  347 Max score  256 Game score  3924
Game #  13 Game length  279 Max score  256 Game score  3180
Game #  14 Game length  177 Max score  128 Game score  1616
Game #  15 Game length  288 Max score  256 Game score  3276
Game #  16 Game length  347 Max score  256 Game score  4236
Game #  17 Game length  205 Max score  256 Game score 

Game #  136 Game length  429 Max score  512 Game score  5696
Game #  137 Game length  376 Max score  256 Game score  4572
Game #  138 Game length  765 Max score  1024 Game score  12144
Game #  139 Game length  813 Max score  1024 Game score  12784
Game #  140 Game length  656 Max score  512 Game score  9040
Game #  141 Game length  590 Max score  512 Game score  8032
Game #  142 Game length  750 Max score  1024 Game score  11848
Game #  143 Game length  1029 Max score  1024 Game score  16396
Game #  144 Game length  784 Max score  1024 Game score  12364
Game #  145 Game length  650 Max score  512 Game score  8828
Game #  146 Game length  310 Max score  256 Game score  3512
Game #  147 Game length  622 Max score  512 Game score  8524
Game #  148 Game length  542 Max score  512 Game score  7368
Game #  149 Game length  608 Max score  512 Game score  8400
Game #  150 Game length  896 Max score  1024 Game score  14016
Game #  151 Game length  307 Max score  256 Game score  3436
Game #  152

Game #  269 Game length  811 Max score  1024 Game score  12652
Game #  270 Game length  511 Max score  512 Game score  6924
Game #  271 Game length  821 Max score  1024 Game score  12728
Game #  272 Game length  550 Max score  512 Game score  7448
Game #  273 Game length  597 Max score  512 Game score  8080
Game #  274 Game length  1558 Max score  2048 Game score  27772
Game #  275 Game length  756 Max score  1024 Game score  12060
Game #  276 Game length  831 Max score  1024 Game score  12940
Game #  277 Game length  553 Max score  512 Game score  7532
Game #  278 Game length  780 Max score  1024 Game score  12332
Game #  279 Game length  543 Max score  512 Game score  7372
Game #  280 Game length  1047 Max score  1024 Game score  16540
Game #  281 Game length  1026 Max score  1024 Game score  16344
Game #  282 Game length  865 Max score  1024 Game score  13556
Game #  283 Game length  891 Max score  1024 Game score  14436
Game #  284 Game length  542 Max score  512 Game score  7368
G

Game #  401 Game length  1013 Max score  1024 Game score  16180
Game #  402 Game length  951 Max score  1024 Game score  15068
Game #  403 Game length  600 Max score  512 Game score  8336
Game #  404 Game length  914 Max score  1024 Game score  14652
Game #  405 Game length  762 Max score  1024 Game score  12104
Game #  406 Game length  795 Max score  1024 Game score  12460
Game #  407 Game length  535 Max score  512 Game score  7292
Game #  408 Game length  1043 Max score  1024 Game score  16492
Game #  409 Game length  1123 Max score  1024 Game score  18328
Game #  410 Game length  531 Max score  512 Game score  7260
Game #  411 Game length  528 Max score  512 Game score  7244
Game #  412 Game length  803 Max score  1024 Game score  12512
Game #  413 Game length  924 Max score  1024 Game score  14780
Game #  414 Game length  1748 Max score  2048 Game score  32108
Game #  415 Game length  528 Max score  512 Game score  7244
Game #  416 Game length  767 Max score  1024 Game score  1216

Game #  532 Game length  1049 Max score  1024 Game score  16552
Game #  533 Game length  1239 Max score  1024 Game score  20848
Game #  534 Game length  790 Max score  1024 Game score  12408
Game #  535 Game length  1060 Max score  1024 Game score  16660
Game #  536 Game length  1054 Max score  1024 Game score  16588
Game #  537 Game length  528 Max score  512 Game score  7244
Game #  538 Game length  567 Max score  512 Game score  7644
Game #  539 Game length  543 Max score  512 Game score  7372
Game #  540 Game length  1041 Max score  1024 Game score  16472
Game #  541 Game length  594 Max score  512 Game score  8028
Game #  542 Game length  1053 Max score  1024 Game score  16640
Game #  543 Game length  1539 Max score  2048 Game score  27556
Game #  544 Game length  571 Max score  512 Game score  7676
Game #  545 Game length  945 Max score  1024 Game score  15120
Game #  546 Game length  860 Max score  1024 Game score  13996
Game #  547 Game length  394 Max score  256 Game score  48

Game #  663 Game length  991 Max score  1024 Game score  15820
Game #  664 Game length  979 Max score  1024 Game score  15708
Game #  665 Game length  977 Max score  1024 Game score  15696
Game #  666 Game length  1287 Max score  1024 Game score  21500
Game #  667 Game length  548 Max score  512 Game score  7408
Game #  668 Game length  1076 Max score  1024 Game score  16848
Game #  669 Game length  1907 Max score  2048 Game score  34844
Game #  670 Game length  1076 Max score  1024 Game score  17692
Game #  671 Game length  432 Max score  512 Game score  5760
Game #  672 Game length  1687 Max score  2048 Game score  31100
Game #  673 Game length  788 Max score  1024 Game score  12396
Game #  674 Game length  930 Max score  1024 Game score  14824
Game #  675 Game length  772 Max score  1024 Game score  12192
Game #  676 Game length  529 Max score  512 Game score  7248
Game #  677 Game length  1041 Max score  1024 Game score  16472
Game #  678 Game length  898 Max score  1024 Game score

Game #  793 Game length  994 Max score  1024 Game score  15836
Game #  794 Game length  1034 Max score  1024 Game score  16424
Game #  795 Game length  1043 Max score  1024 Game score  16492
Game #  796 Game length  1545 Max score  2048 Game score  27680
Game #  797 Game length  1039 Max score  1024 Game score  16460
Game #  798 Game length  1539 Max score  2048 Game score  27612
Game #  799 Game length  928 Max score  1024 Game score  14800
Game #  800 Game length  2148 Max score  2048 Game score  38224
Game #  801 Game length  212 Max score  256 Game score  2392
Game #  802 Game length  660 Max score  1024 Game score  10588
Game #  803 Game length  799 Max score  1024 Game score  12492
Game #  804 Game length  1014 Max score  1024 Game score  16172
Game #  805 Game length  997 Max score  1024 Game score  15864
Game #  806 Game length  1511 Max score  2048 Game score  27308
Game #  807 Game length  1460 Max score  1024 Game score  24396
Game #  808 Game length  1747 Max score  2048 Ga

Game #  923 Game length  1804 Max score  2048 Game score  32828
Game #  924 Game length  1553 Max score  2048 Game score  27736
Game #  925 Game length  919 Max score  1024 Game score  14732
Game #  926 Game length  1046 Max score  1024 Game score  16508
Game #  927 Game length  537 Max score  512 Game score  7340
Game #  928 Game length  1043 Max score  1024 Game score  16484
Game #  929 Game length  1039 Max score  1024 Game score  16460
Game #  930 Game length  1041 Max score  1024 Game score  16472
Game #  931 Game length  1135 Max score  1024 Game score  17932
Game #  932 Game length  1992 Max score  2048 Game score  36124
Game #  933 Game length  1009 Max score  1024 Game score  16144
Game #  934 Game length  529 Max score  512 Game score  7248
Game #  935 Game length  1298 Max score  2048 Game score  23640
Game #  936 Game length  928 Max score  1024 Game score  14800
Game #  937 Game length  818 Max score  1024 Game score  12824
Game #  938 Game length  1021 Max score  1024 Gam

Game #  1052 Game length  2030 Max score  2048 Game score  36616
Game #  1053 Game length  796 Max score  1024 Game score  12476
Game #  1054 Game length  958 Max score  1024 Game score  15296
Game #  1055 Game length  1010 Max score  1024 Game score  16152
Game #  1056 Game length  1807 Max score  2048 Game score  32844
Game #  1057 Game length  1567 Max score  1024 Game score  25816
Game #  1058 Game length  1049 Max score  1024 Game score  16552
Game #  1059 Game length  1552 Max score  2048 Game score  27728
Game #  1060 Game length  1564 Max score  2048 Game score  27900
Game #  1061 Game length  1098 Max score  1024 Game score  17208
Game #  1062 Game length  554 Max score  512 Game score  7528
Game #  1063 Game length  889 Max score  1024 Game score  14400
Game #  1064 Game length  2026 Max score  2048 Game score  36472
Game #  1065 Game length  1999 Max score  2048 Game score  36172
Game #  1066 Game length  922 Max score  1024 Game score  14760
Game #  1067 Game length  988 Ma

Game #  1179 Game length  1470 Max score  1024 Game score  22996
Game #  1180 Game length  2070 Max score  2048 Game score  37020
Game #  1181 Game length  1436 Max score  2048 Game score  26044
Game #  1182 Game length  544 Max score  512 Game score  7404
Game #  1183 Game length  2035 Max score  2048 Game score  36652
Game #  1184 Game length  1798 Max score  2048 Game score  32784
Game #  1185 Game length  2127 Max score  2048 Game score  37968
Game #  1186 Game length  546 Max score  512 Game score  7400
Game #  1187 Game length  1038 Max score  1024 Game score  16456
Game #  1188 Game length  782 Max score  1024 Game score  12344
Game #  1189 Game length  854 Max score  1024 Game score  13428
Game #  1190 Game length  1872 Max score  2048 Game score  33876
Game #  1191 Game length  803 Max score  1024 Game score  12524
Game #  1192 Game length  1407 Max score  2048 Game score  25724
Game #  1193 Game length  1001 Max score  1024 Game score  15720
Game #  1194 Game length  1685 Max

Game #  1306 Game length  828 Max score  1024 Game score  13136
Game #  1307 Game length  801 Max score  1024 Game score  12504
Game #  1308 Game length  1992 Max score  2048 Game score  36124
Game #  1309 Game length  1527 Max score  2048 Game score  27356
Game #  1310 Game length  1032 Max score  1024 Game score  16384
Game #  1311 Game length  1991 Max score  2048 Game score  36092
Game #  1312 Game length  1043 Max score  1024 Game score  16520
Game #  1313 Game length  1047 Max score  1024 Game score  16540
Game #  1314 Game length  1019 Max score  1024 Game score  16248
Game #  1315 Game length  1548 Max score  2048 Game score  27708
Game #  1316 Game length  1624 Max score  2048 Game score  28844
Game #  1317 Game length  977 Max score  1024 Game score  15696
Game #  1318 Game length  533 Max score  512 Game score  7296
Game #  1319 Game length  1024 Max score  1024 Game score  16272
Game #  1320 Game length  1582 Max score  2048 Game score  28044
Game #  1321 Game length  607 M

Game #  1434 Game length  1544 Max score  2048 Game score  27676
Game #  1435 Game length  1041 Max score  1024 Game score  16472
Game #  1436 Game length  935 Max score  1024 Game score  14908
Game #  1437 Game length  1566 Max score  2048 Game score  27852
Game #  1438 Game length  972 Max score  1024 Game score  15420
Game #  1439 Game length  1799 Max score  2048 Game score  32764
Game #  1440 Game length  1536 Max score  2048 Game score  27536
Game #  1441 Game length  519 Max score  512 Game score  7148
Game #  1442 Game length  1051 Max score  1024 Game score  16572
Game #  1443 Game length  2029 Max score  2048 Game score  36488
Game #  1444 Game length  1313 Max score  2048 Game score  23740
Game #  1445 Game length  943 Max score  1024 Game score  14988
Game #  1446 Game length  1255 Max score  1024 Game score  19316
Game #  1447 Game length  1048 Max score  1024 Game score  16544
Game #  1448 Game length  2163 Max score  2048 Game score  38956
Game #  1449 Game length  1020 

Game #  1562 Game length  1543 Max score  2048 Game score  27644
Game #  1563 Game length  1035 Max score  1024 Game score  16428
Game #  1564 Game length  339 Max score  256 Game score  3932
Game #  1565 Game length  1161 Max score  2048 Game score  21756
Game #  1566 Game length  540 Max score  512 Game score  7356
Game #  1567 Game length  1110 Max score  1024 Game score  17308
Game #  1568 Game length  547 Max score  512 Game score  7464
Game #  1569 Game length  1094 Max score  1024 Game score  17148
Game #  1570 Game length  1823 Max score  2048 Game score  32980
Game #  1571 Game length  1039 Max score  1024 Game score  16460
Game #  1572 Game length  3090 Max score  4096 Game score  61576
Game #  1573 Game length  2104 Max score  2048 Game score  37368
Game #  1574 Game length  1077 Max score  1024 Game score  17036
Game #  1575 Game length  1877 Max score  2048 Game score  33564
Game #  1576 Game length  1039 Max score  1024 Game score  16460
Game #  1577 Game length  1571 Max

In [9]:
num_epochs = 100

def test(model):
    epoch = 0
    while epoch != num_epochs:
        epoch += 1
        game_len, max_score, game_score, last_loss = gen_sample_and_learn(model, None, None, False)
        print ('Game # ', epoch, 'Game length ', game_len, 'Max score ', max_score, 'Game score ', game_score, flush=True)

test(model)

epoch 1 2026 2048 36412 0
epoch 2 2040 2048 36680 0
epoch 3 1060 1024 16660 0
epoch 4 801 1024 12488 0
epoch 5 727 1024 11628 0
epoch 6 1042 1024 16476 0
epoch 7 796 1024 12476 0
epoch 8 925 1024 14280 0
epoch 9 1552 2048 27728 0
epoch 10 1801 2048 32800 0
epoch 11 1547 2048 27692 0
epoch 12 1054 1024 16588 0
epoch 13 1038 1024 16456 0
epoch 14 1031 1024 16408 0
epoch 15 1864 2048 33568 0
epoch 16 1809 2048 32856 0
epoch 17 1822 2048 32972 0
epoch 18 1553 2048 27736 0
epoch 19 1823 2048 32980 0
epoch 20 569 512 7628 0
epoch 21 1617 2048 29264 0
epoch 22 2048 2048 36760 0
epoch 23 1042 1024 16476 0
epoch 24 1044 1024 16488 0
epoch 25 1553 2048 27760 0
epoch 26 1004 1024 15984 0
epoch 27 1972 2048 35884 0
epoch 28 1041 1024 16472 0
epoch 29 2080 2048 37264 0
epoch 30 1559 2048 27804 0
epoch 31 1488 2048 26956 0
epoch 32 1813 2048 32888 0
epoch 33 1914 2048 34892 0
epoch 34 1544 2048 27648 0
epoch 35 1058 1024 16620 0
epoch 36 975 1024 15676 0
epoch 37 1054 1024 16588 0
epoch 38 980 1024 