In [1]:
import torch
from torch import nn
import torch.optim as optim
import numpy as np
import random

In [2]:
class NN2048(nn.Module):
    def __init__(self, input_size=16, filter1=256, filter2=2048, drop_prob=0.):
        super(NN2048, self).__init__()
        self.conv_a = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,1), padding=0)
        self.conv_b = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,2), padding=0)
        self.conv_c = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,2), padding=0)
        
        self.conv_aa = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_ab = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.conv_ba = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_bb = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.conv_cc = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,2), padding=0)
        
        self.relu = nn.ReLU()
        self.W_aa = nn.Linear(filter2 * 8, 1)
        self.W_ab = nn.Linear(filter2 * 9, 1)
        self.W_ba = nn.Linear(filter2 * 9, 1)
        self.W_bb = nn.Linear(filter2 * 8, 1)
        self.W_cc = nn.Linear(filter2 * 4, 1)

    def flatten(self, x):
        N = x.size()[0]
        return x.view(N, -1)
        
    def forward(self, x):
        x = x.float()
        a = self.relu(self.conv_a(x))
        b = self.relu(self.conv_b(x))
        c = self.relu(self.conv_c(x))
        aa = self.flatten(self.relu(self.conv_aa(a)))
        ab = self.flatten(self.relu(self.conv_ab(a)))
        ba = self.flatten(self.relu(self.conv_ba(b)))
        bb = self.flatten(self.relu(self.conv_bb(b)))
        cc = self.flatten(self.relu(self.conv_cc(c)))
        out = self.W_aa(aa) + self.W_ab(ab) + self.W_ba(ba) + self.W_bb(bb) + self.W_cc(cc)
        return out

In [3]:
def make_input(grid):
    r = np.zeros(shape=(16, 4, 4))
    for i in range(4):
        for j in range(4):
            r[grid[i, j],i, j]=1
    return r

def add_two(mat):
    indexs=np.argwhere(mat==0)
    index=np.random.randint(0,len(indexs))
    mat[tuple(indexs[index])] = 1
    return mat

In [4]:
singleScore=[0,0,4,16,48,128,320,768,1792,4096,9216,20480,45056,98304,212992,458752,983040]
moveDict=np.load('move.npy')

def move(list):
    return moveDict[list[0],list[1],list[2],list[3],:]

def lookup(x):
    return singleScore[x]

lookup = np.vectorize(lookup)

def getScore(matrix):
    return np.sum(lookup(matrix))

def getMove(grid):
    board_list = []
    for i in range(4):
        newGrid=moveGrid(grid, i)
        if not isSame(grid,newGrid):
            board_list.append((newGrid, i, getScore(newGrid)))
    return board_list
        
def moveGrid(grid,i):
    # new=np.zeros((4,4),dtype=np.int)
    new = None
    if i==0:
        # move up
        grid=np.transpose(grid)
        new = np.stack([move(grid[row,:]) for row in range(4)], axis = 0).astype(int).T
    elif i==1:
        # move left
        new = np.stack([move(grid[row,:]) for row in range(4)], axis = 0).astype(int)
    elif i==2:
        # move down
        grid=np.transpose(grid)
        new = np.stack([np.flip(move(np.flip(grid[row,:]))) for row in range(4)], axis = 0).astype(int).T
    elif i==3:
        # move right
        new = np.stack([np.flip(move(np.flip(grid[row,:]))) for row in range(4)], axis = 0).astype(int)
    return new

def isSame(grid1,grid2):
    return np.all(grid1==grid2)

In [5]:
def Vchange(grid, v):
    g0 = grid
    g1 = g0[:,::-1,:]
    g2 = g0[:,:,::-1]
    g3 = g2[:,::-1,:]
    r0 = grid.swapaxes(1,2)
    r1 = r0[:,::-1,:]
    r2 = r0[:,:,::-1]
    r3 = r2[:,::-1,:]
    xtrain = np.array([g0,g1,g2,g3,r0,r1,r2,r3])
    ytrain = np.array([v]*8)
    return xtrain, ytrain

def gen_sample_and_learn(model, optimizer, loss_fn, is_train = False, explorationProb=0.1):
    model.eval()
    game_len = 0
    game_score = 0
    last_grid1 = np.zeros((4,4),dtype=np.int)
    last_grid1 = add_two(last_grid1)
    last_grid2 = make_input(last_grid1)
    last_loss = 0

    while True:
        grid_array = add_two(last_grid1)
        board_list = getMove(grid_array)
        if board_list:
            boards = np.array([make_input(g) for g,m,s in board_list])
            p = model(torch.from_numpy(boards).cuda()).flatten().detach()        
            game_len += 1
            best_v = None
            for i, (g,m,s) in enumerate(board_list):
                v = (s - game_score) + p[i].item()
                if best_v is None or v > best_v:
                    best_v = v
                    best_score = s
                    best_grid1 = board_list[i][0]
                    best_grid2 = boards[i]
                    
        else:
            best_v = 0
            best_grid1 = None
            best_grid2 = None
            
        if is_train:
            x, y = Vchange(last_grid2, best_v)
            x = torch.from_numpy(x).cuda()
            y = torch.from_numpy(y).unsqueeze(dim=1).cuda().float()
            model.train()
            optimizer.zero_grad()
            pred = model(x)
            loss = loss_fn(pred, y) / 2
            last_loss = loss.item()
            loss.backward()
#             nn.utils.clip_grad_norm_(model.parameters(), 10.0) #
            optimizer.step()
            model.eval()
#             if game_len % 50 == 0:
#                 print (game_len, last_loss)
                
        if not board_list:
            break
            
        # gibbs sampling or espilon-greedy
        if is_train and random.random() < explorationProb:
            idx = random.randint(0, len(board_list) - 1)
            game_score = board_list[idx][2]
            last_grid1 = board_list[idx][0]
            last_grid2 = boards[idx]
        else:
            game_score = best_score
            last_grid1 = best_grid1
            last_grid2 = best_grid2
        
    return game_len, 2**grid_array.max(), game_score, last_loss

In [6]:
lr = 1e-4
weight_decay = 0
beta1 = 0.9

model = NN2048().cuda()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay, betas=(beta1, 0.999))
loss=nn.MSELoss()

In [7]:
import os
experiment_dir = "model"

def save_model(state, filename='model.pth.tar'):
    filename = os.path.join(experiment_dir, filename)
    torch.save(state, filename)

In [13]:
def load_model(model, optimizer, checkpoint_path):
    ckpt_dict = torch.load(checkpoint_path, map_location="cuda:0")

    model.load_state_dict(ckpt_dict['state_dict'])
    optimizer.load_state_dict(ckpt_dict['optimizer'])
    epoch = ckpt_dict['epoch']
    return model, optimizer, epoch

model, optimizer, epoch = load_model(model, optimizer, "model/model8_1500.pth.tar")

In [None]:
num_epochs = 5000

def train(model, optimizer, loss, epoch = 0):
    ls = [1024] * 10
    running_mean = 2048
    while epoch != num_epochs:
        epoch += 1
        game_len, max_score, game_score, last_loss = gen_sample_and_learn(model, optimizer, loss, True, 0)
        print ('Game # ', epoch, 'Game length ', game_len, 'Max score ', max_score, 'Game score ', game_score, flush=True)
        ls.pop(0)
        ls.append(max_score)
        if sum(ls) / 10 > running_mean:
            running_mean = sum(ls) / 10
            filename = "model8_"+str(epoch // 100)+".pth.tar"
            save_model({
                'epoch': epoch,
                'state_dict': model.cpu().state_dict(),
                'optimizer': optimizer.state_dict(),
            }, filename)
            model.cuda()
    
    
train(model, optimizer, loss, epoch)

Game #  1501 Game length  1057 Max score  1024 Game score  16616
Game #  1502 Game length  1929 Max score  2048 Game score  34600
Game #  1503 Game length  1751 Max score  2048 Game score  32124
Game #  1504 Game length  1262 Max score  1024 Game score  20108
Game #  1505 Game length  1513 Max score  2048 Game score  27356
Game #  1506 Game length  1057 Max score  1024 Game score  16616
Game #  1507 Game length  1979 Max score  2048 Game score  35948
Game #  1508 Game length  559 Max score  512 Game score  7564
Game #  1509 Game length  1039 Max score  1024 Game score  16460
Game #  1510 Game length  1573 Max score  2048 Game score  27984
Game #  1511 Game length  1833 Max score  2048 Game score  33128
Game #  1512 Game length  1627 Max score  1024 Game score  26568
Game #  1513 Game length  1998 Max score  2048 Game score  35412
Game #  1514 Game length  2774 Max score  4096 Game score  56696
Game #  1515 Game length  1072 Max score  1024 Game score  16912
Game #  1516 Game length  98

Game #  1630 Game length  2512 Max score  4096 Game score  51532
Game #  1631 Game length  1003 Max score  1024 Game score  15932
Game #  1632 Game length  1992 Max score  2048 Game score  36124
Game #  1633 Game length  1810 Max score  2048 Game score  32896
Game #  1634 Game length  2852 Max score  4096 Game score  57652
Game #  1637 Game length  1850 Max score  2048 Game score  33276
Game #  1638 Game length  2046 Max score  2048 Game score  36748
Game #  1639 Game length  1751 Max score  2048 Game score  32124
Game #  1640 Game length  1933 Max score  2048 Game score  35128
Game #  1641 Game length  802 Max score  1024 Game score  12508
Game #  1642 Game length  1048 Max score  1024 Game score  16556
Game #  1643 Game length  571 Max score  512 Game score  7592
Game #  1644 Game length  2127 Max score  2048 Game score  37756
Game #  1645 Game length  1064 Max score  1024 Game score  16740
Game #  1646 Game length  820 Max score  1024 Game score  12664
Game #  1647 Game length  2892

Game #  1764 Game length  1070 Max score  1024 Game score  16780
Game #  1765 Game length  812 Max score  512 Game score  11676
Game #  1766 Game length  1862 Max score  2048 Game score  33436
Game #  1767 Game length  1387 Max score  2048 Game score  24668
Game #  1768 Game length  1107 Max score  1024 Game score  17144
Game #  1769 Game length  944 Max score  1024 Game score  14440
Game #  1770 Game length  1965 Max score  2048 Game score  35612
Game #  1771 Game length  1038 Max score  1024 Game score  16456
Game #  1772 Game length  548 Max score  512 Game score  7400
Game #  1773 Game length  753 Max score  1024 Game score  11864
Game #  1774 Game length  1032 Max score  1024 Game score  16328
Game #  1775 Game length  1116 Max score  1024 Game score  17352
Game #  1776 Game length  1056 Max score  1024 Game score  16600
Game #  1777 Game length  1887 Max score  2048 Game score  33752
Game #  1778 Game length  1112 Max score  1024 Game score  17552
Game #  1779 Game length  2119 M

Game #  1894 Game length  1574 Max score  2048 Game score  27932
Game #  1895 Game length  1056 Max score  1024 Game score  16656
Game #  1896 Game length  2111 Max score  2048 Game score  37444
Game #  1897 Game length  2060 Max score  2048 Game score  36928
Game #  1898 Game length  2379 Max score  4096 Game score  49200
Game #  1899 Game length  1554 Max score  2048 Game score  27740
Game #  1900 Game length  818 Max score  1024 Game score  12824
Game #  1901 Game length  1058 Max score  1024 Game score  16620
Game #  1902 Game length  2062 Max score  2048 Game score  36940
Game #  1903 Game length  2051 Max score  2048 Game score  36864
Game #  1904 Game length  1384 Max score  1024 Game score  22644
Game #  1905 Game length  592 Max score  512 Game score  7960
Game #  1906 Game length  1848 Max score  2048 Game score  33436
Game #  1907 Game length  1045 Max score  1024 Game score  16504
Game #  1908 Game length  1002 Max score  1024 Game score  15708
Game #  1909 Game length  227

Game #  2022 Game length  1981 Max score  2048 Game score  36016
Game #  2023 Game length  1127 Max score  1024 Game score  17460
Game #  2024 Game length  1053 Max score  1024 Game score  16584
Game #  2025 Game length  2034 Max score  2048 Game score  36540
Game #  2026 Game length  2633 Max score  4096 Game score  53760
Game #  2027 Game length  2976 Max score  4096 Game score  59864
Game #  2028 Game length  1778 Max score  2048 Game score  32544
Game #  2029 Game length  1031 Max score  1024 Game score  16368
Game #  2030 Game length  827 Max score  1024 Game score  12796
Game #  2031 Game length  1063 Max score  1024 Game score  16684
Game #  2032 Game length  583 Max score  512 Game score  7872
Game #  2033 Game length  1869 Max score  2048 Game score  33608
Game #  2034 Game length  636 Max score  512 Game score  8456
Game #  2035 Game length  2344 Max score  4096 Game score  48416
Game #  2036 Game length  1180 Max score  1024 Game score  18376
Game #  2037 Game length  1570 M

Game #  2149 Game length  1293 Max score  2048 Game score  23600
Game #  2150 Game length  1604 Max score  2048 Game score  28248
Game #  2151 Game length  1936 Max score  2048 Game score  35180
Game #  2152 Game length  2953 Max score  4096 Game score  59680
Game #  2153 Game length  1819 Max score  1024 Game score  29896
Game #  2154 Game length  1036 Max score  1024 Game score  16408
Game #  2155 Game length  3636 Max score  4096 Game score  73176
Game #  2156 Game length  1807 Max score  2048 Game score  32844
Game #  2157 Game length  708 Max score  512 Game score  9560
Game #  2158 Game length  1591 Max score  2048 Game score  28188
Game #  2159 Game length  2001 Max score  2048 Game score  36184
Game #  2160 Game length  848 Max score  1024 Game score  12968
Game #  2161 Game length  1017 Max score  1024 Game score  16236
Game #  2162 Game length  3529 Max score  4096 Game score  71968
Game #  2163 Game length  1936 Max score  2048 Game score  35152
Game #  2164 Game length  233

Game #  2276 Game length  1041 Max score  1024 Game score  16472
Game #  2277 Game length  1050 Max score  1024 Game score  16556
Game #  2278 Game length  1992 Max score  2048 Game score  35704
Game #  2279 Game length  3974 Max score  4096 Game score  80152
Game #  2280 Game length  1943 Max score  2048 Game score  35204
Game #  2281 Game length  2023 Max score  2048 Game score  36568
Game #  2282 Game length  1036 Max score  1024 Game score  16444
Game #  2283 Game length  566 Max score  512 Game score  7640
Game #  2284 Game length  892 Max score  1024 Game score  13576
Game #  2285 Game length  1829 Max score  2048 Game score  33080
Game #  2286 Game length  2091 Max score  2048 Game score  37240
Game #  2287 Game length  1050 Max score  1024 Game score  16556
Game #  2288 Game length  1116 Max score  1024 Game score  18072
Game #  2289 Game length  1050 Max score  1024 Game score  16484
Game #  2290 Game length  1039 Max score  1024 Game score  16460
Game #  2291 Game length  189

Game #  2403 Game length  2006 Max score  2048 Game score  36428
Game #  2404 Game length  1914 Max score  2048 Game score  34976
Game #  2405 Game length  841 Max score  1024 Game score  12968
Game #  2406 Game length  1063 Max score  1024 Game score  16716
Game #  2407 Game length  1468 Max score  2048 Game score  26368
Game #  2408 Game length  1009 Max score  1024 Game score  16144
Game #  2409 Game length  3058 Max score  4096 Game score  61224
Game #  2410 Game length  1931 Max score  2048 Game score  35116
Game #  2411 Game length  2011 Max score  2048 Game score  36284
Game #  2412 Game length  924 Max score  1024 Game score  14780
Game #  2413 Game length  3539 Max score  4096 Game score  72072
Game #  2414 Game length  1538 Max score  2048 Game score  27616
Game #  2415 Game length  2068 Max score  2048 Game score  36984
Game #  2416 Game length  1203 Max score  2048 Game score  22124
Game #  2417 Game length  1067 Max score  1024 Game score  16752
Game #  2418 Game length  1

Game #  2530 Game length  890 Max score  1024 Game score  14284
Game #  2531 Game length  892 Max score  1024 Game score  13576
Game #  2532 Game length  1585 Max score  2048 Game score  28060
Game #  2533 Game length  663 Max score  512 Game score  9500
Game #  2534 Game length  1026 Max score  1024 Game score  16284
Game #  2535 Game length  1886 Max score  2048 Game score  33804
Game #  2536 Game length  1813 Max score  2048 Game score  32888
Game #  2537 Game length  768 Max score  1024 Game score  12144
Game #  2538 Game length  3095 Max score  4096 Game score  61612
Game #  2539 Game length  1056 Max score  1024 Game score  16684
Game #  2540 Game length  2089 Max score  2048 Game score  37240
Game #  2541 Game length  592 Max score  512 Game score  8016
Game #  2542 Game length  1852 Max score  2048 Game score  33288
Game #  2543 Game length  2106 Max score  2048 Game score  37404
Game #  2544 Game length  1040 Max score  1024 Game score  16464
Game #  2545 Game length  993 Max 

Game #  2657 Game length  1029 Max score  1024 Game score  16360
Game #  2658 Game length  1124 Max score  1024 Game score  17432
Game #  2659 Game length  949 Max score  1024 Game score  14972
Game #  2660 Game length  2013 Max score  2048 Game score  36296
Game #  2661 Game length  1123 Max score  1024 Game score  17400
Game #  2662 Game length  2368 Max score  4096 Game score  48664
Game #  2663 Game length  911 Max score  1024 Game score  14652
Game #  2664 Game length  807 Max score  1024 Game score  12544
Game #  2665 Game length  1550 Max score  2048 Game score  27720
Game #  2666 Game length  2461 Max score  2048 Game score  44520
Game #  2667 Game length  2929 Max score  4096 Game score  59408
Game #  2668 Game length  1077 Max score  1024 Game score  16856
Game #  2669 Game length  975 Max score  1024 Game score  15436
Game #  2670 Game length  1072 Max score  1024 Game score  16912
Game #  2671 Game length  2028 Max score  2048 Game score  36568
Game #  2672 Game length  104

Game #  2784 Game length  3015 Max score  4096 Game score  60264
Game #  2785 Game length  3306 Max score  4096 Game score  68200
Game #  2786 Game length  912 Max score  1024 Game score  14668
Game #  2787 Game length  1559 Max score  2048 Game score  27804
Game #  2788 Game length  3034 Max score  4096 Game score  60844
Game #  2789 Game length  1952 Max score  2048 Game score  35288
Game #  2790 Game length  1856 Max score  2048 Game score  33392
Game #  2791 Game length  1562 Max score  2048 Game score  27832
Game #  2792 Game length  2039 Max score  2048 Game score  36588
Game #  2793 Game length  1141 Max score  1024 Game score  17640
Game #  2794 Game length  1322 Max score  2048 Game score  23864
Game #  2795 Game length  1755 Max score  2048 Game score  32148
Game #  2796 Game length  2317 Max score  2048 Game score  44096
Game #  2797 Game length  778 Max score  1024 Game score  12264
Game #  2798 Game length  1952 Max score  2048 Game score  35288
Game #  2799 Game length  1

Game #  2911 Game length  1319 Max score  2048 Game score  23884
Game #  2912 Game length  678 Max score  512 Game score  9388
Game #  2913 Game length  1870 Max score  2048 Game score  33516
Game #  2914 Game length  1116 Max score  1024 Game score  17352
Game #  2915 Game length  1887 Max score  2048 Game score  33836
Game #  2916 Game length  575 Max score  512 Game score  7696
Game #  2917 Game length  2076 Max score  2048 Game score  37064
Game #  2918 Game length  2179 Max score  2048 Game score  38668
Game #  2919 Game length  1107 Max score  1024 Game score  17292
Game #  2920 Game length  798 Max score  1024 Game score  12488
Game #  2921 Game length  2013 Max score  2048 Game score  36044
Game #  2922 Game length  2057 Max score  2048 Game score  36904
Game #  2923 Game length  2013 Max score  2048 Game score  36296
Game #  2924 Game length  849 Max score  1024 Game score  13032
Game #  2925 Game length  1484 Max score  2048 Game score  26684
Game #  2926 Game length  1160 Ma

Game #  3038 Game length  1003 Max score  1024 Game score  15560
Game #  3039 Game length  2088 Max score  2048 Game score  37176
Game #  3040 Game length  2954 Max score  4096 Game score  59564
Game #  3041 Game length  1596 Max score  2048 Game score  28168
Game #  3042 Game length  1958 Max score  2048 Game score  35416
Game #  3043 Game length  772 Max score  1024 Game score  12252
Game #  3044 Game length  1345 Max score  2048 Game score  24104
Game #  3045 Game length  1762 Max score  2048 Game score  32220
Game #  3046 Game length  1567 Max score  2048 Game score  27884
Game #  3047 Game length  2021 Max score  2048 Game score  36520
Game #  3048 Game length  2285 Max score  2048 Game score  40712
Game #  3049 Game length  1900 Max score  2048 Game score  34796
Game #  3050 Game length  1876 Max score  2048 Game score  34400
Game #  3051 Game length  1707 Max score  2048 Game score  31340
Game #  3052 Game length  1802 Max score  2048 Game score  32808
Game #  3053 Game length  

Game #  3165 Game length  2023 Max score  2048 Game score  36444
Game #  3166 Game length  1074 Max score  1024 Game score  16812
Game #  3167 Game length  1698 Max score  2048 Game score  31256


In [9]:
num_epochs = 100

def test(model):
    epoch = 0
    while epoch != num_epochs:
        epoch += 1
        game_len, max_score, game_score, last_loss = gen_sample_and_learn(model, None, None, False)
        print ('Game # ', epoch, 'Game length ', game_len, 'Max score ', max_score, 'Game score ', game_score, flush=True)

test(model)

epoch 1 2026 2048 36412 0
epoch 2 2040 2048 36680 0
epoch 3 1060 1024 16660 0
epoch 4 801 1024 12488 0
epoch 5 727 1024 11628 0
epoch 6 1042 1024 16476 0
epoch 7 796 1024 12476 0
epoch 8 925 1024 14280 0
epoch 9 1552 2048 27728 0
epoch 10 1801 2048 32800 0
epoch 11 1547 2048 27692 0
epoch 12 1054 1024 16588 0
epoch 13 1038 1024 16456 0
epoch 14 1031 1024 16408 0
epoch 15 1864 2048 33568 0
epoch 16 1809 2048 32856 0
epoch 17 1822 2048 32972 0
epoch 18 1553 2048 27736 0
epoch 19 1823 2048 32980 0
epoch 20 569 512 7628 0
epoch 21 1617 2048 29264 0
epoch 22 2048 2048 36760 0
epoch 23 1042 1024 16476 0
epoch 24 1044 1024 16488 0
epoch 25 1553 2048 27760 0
epoch 26 1004 1024 15984 0
epoch 27 1972 2048 35884 0
epoch 28 1041 1024 16472 0
epoch 29 2080 2048 37264 0
epoch 30 1559 2048 27804 0
epoch 31 1488 2048 26956 0
epoch 32 1813 2048 32888 0
epoch 33 1914 2048 34892 0
epoch 34 1544 2048 27648 0
epoch 35 1058 1024 16620 0
epoch 36 975 1024 15676 0
epoch 37 1054 1024 16588 0
epoch 38 980 1024 