In [1]:
from c2048log import Game, push

In [2]:
import torch
from torch import nn
import torch.optim as optim
import numpy as np

In [3]:
class NN2048(nn.Module):
    def __init__(self, input_size=16, filter1=128, filter2=512, filter3=1024, drop_prob=0.):
        super(NN2048, self).__init__()
        self.conv_a = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,1), padding=0)
        self.conv_b = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,2), padding=0)
        
        self.conv_aa = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_ab = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.conv_ba = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_bb = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)

        self.conv_aaa = nn.Conv2d(in_channels=filter2, out_channels=filter3, kernel_size=(2,1), padding=0)
        self.conv_aab = nn.Conv2d(in_channels=filter2, out_channels=filter3, kernel_size=(1,2), padding=0)
        self.conv_aba = nn.Conv2d(in_channels=filter2, out_channels=filter3, kernel_size=(2,1), padding=0)
        self.conv_abb = nn.Conv2d(in_channels=filter2, out_channels=filter3, kernel_size=(1,2), padding=0)

        self.conv_baa = nn.Conv2d(in_channels=filter2, out_channels=filter3, kernel_size=(2,1), padding=0)
        self.conv_bab = nn.Conv2d(in_channels=filter2, out_channels=filter3, kernel_size=(1,2), padding=0)
        self.conv_bba = nn.Conv2d(in_channels=filter2, out_channels=filter3, kernel_size=(2,1), padding=0)
        self.conv_bbb = nn.Conv2d(in_channels=filter2, out_channels=filter3, kernel_size=(1,2), padding=0)
        
        self.relu = nn.ReLU()
        self.W_x = nn.Linear(input_size * 16, 1)
        self.W_a = nn.Linear(filter1 * 12, 1)
        self.W_b = nn.Linear(filter1 * 12, 1)

        self.W_aa = nn.Linear(filter2 * 8, 1)
        self.W_ab = nn.Linear(filter2 * 9, 1)
        self.W_ba = nn.Linear(filter2 * 9, 1)
        self.W_bb = nn.Linear(filter2 * 8, 1)

        self.W_aaa = nn.Linear(filter3 * 4, 1)
        self.W_aab = nn.Linear(filter3 * 6, 1)
        self.W_aba = nn.Linear(filter3 * 6, 1)
        self.W_abb = nn.Linear(filter3 * 6, 1)
        self.W_baa = nn.Linear(filter3 * 6, 1)
        self.W_bab = nn.Linear(filter3 * 6, 1)
        self.W_bba = nn.Linear(filter3 * 6, 1)
        self.W_bbb = nn.Linear(filter3 * 4, 1)

    def flatten(self, x):
        N = x.size()[0]
        return x.view(N, -1)
        
    def forward(self, x):
        x = x.float()
        a = self.relu(self.conv_a(x))
        b = self.relu(self.conv_b(x))
        aa = self.relu(self.conv_aa(a))
        ab = self.relu(self.conv_ab(a))
        ba = self.relu(self.conv_ba(b))
        bb = self.relu(self.conv_bb(b))

        aaa = self.flatten(self.relu(self.conv_aaa(aa)))
        aab = self.flatten(self.relu(self.conv_aab(aa)))
        aba = self.flatten(self.relu(self.conv_aba(ab)))
        abb = self.flatten(self.relu(self.conv_abb(ab)))
        baa = self.flatten(self.relu(self.conv_baa(ba)))
        bab = self.flatten(self.relu(self.conv_bab(ba)))
        bba = self.flatten(self.relu(self.conv_bba(bb)))
        bbb = self.flatten(self.relu(self.conv_bbb(bb)))
        x = self.flatten(x)
        a = self.flatten(a)
        b = self.flatten(b)
        aa = self.flatten(aa)
        ab = self.flatten(ab)
        ba = self.flatten(ba)
        bb = self.flatten(bb)

        out = self.W_x(x) + self.W_a(a) + self.W_b(b) \
              + self.W_aa(aa) + self.W_ab(ab) + self.W_ba(ba) + self.W_bb(bb) \
              +  self.W_aaa(aaa) + self.W_aab(aab) + self.W_aba(aba) + self.W_abb(abb) \
              + self.W_baa(baa) + self.W_bab(bab) + self.W_bba(bba) + self.W_bbb(bbb)
        return out

In [4]:
singleScore=[0,0,4,16,48,128,320,768,1792,4096,9216,20480,45056,98304,212992,458752,983040]
moveDict=np.load('move.npy')

def move(list):
    return moveDict[list[0],list[1],list[2],list[3],:]

def lookup(x):
    return singleScore[x]

lookup = np.vectorize(lookup)

def getScore(matrix):
    return np.sum(lookup(matrix))

def getMove(grid):
    board_list = []
    for i in range(4):
        newGrid=moveGrid(grid, i)
        if not isSame(grid,newGrid):
            board_list.append((newGrid, i, getScore(newGrid)))
    return board_list
        
def moveGrid(grid,i):
    # new=np.zeros((4,4),dtype=np.int)
    new = None
    if i==0:
        # move up
        grid=np.transpose(grid)
        new = np.stack([move(grid[row,:]) for row in range(4)], axis = 0).astype(int).T
    elif i==1:
        # move left
        new = np.stack([move(grid[row,:]) for row in range(4)], axis = 0).astype(int)
    elif i==2:
        # move down
        grid=np.transpose(grid)
        new = np.stack([np.flip(move(np.flip(grid[row,:]))) for row in range(4)], axis = 0).astype(int).T
    elif i==3:
        # move right
        new = np.stack([np.flip(move(np.flip(grid[row,:]))) for row in range(4)], axis = 0).astype(int)
    return new

def isSame(grid1,grid2):
    return np.all(grid1==grid2)

In [5]:
table ={2**i:i for i in range(1,16)}
table[0]=0
def make_input(grid):
    g0 = grid
    r = np.zeros(shape=(16, 4, 4))
    for i in range(4):
        for j in range(4):
            v = g0[i, j]
            r[table[v],i, j]=1
    return r

In [6]:
def add_two(mat):
    indexs=np.argwhere(mat==0)
    index=np.random.randint(0,len(indexs))
    mat[tuple(indexs[index])] = 2
    return mat

In [7]:
def Vchange(grid, v):
    g0 = grid
    g1 = g0[:,::-1,:]
    g2 = g0[:,:,::-1]
    g3 = g2[:,::-1,:]
    r0 = grid.swapaxes(1,2)
    r1 = r0[:,::-1,:]
    r2 = r0[:,:,::-1]
    r3 = r2[:,::-1,:]
    xtrain = np.array([g0,g1,g2,g3,r0,r1,r2,r3])
    ytrain = np.array([v]*8)
    return xtrain, ytrain

def gen_sample_and_learn(model, optimizer, loss_fn, is_train = True):
    model.eval()
    game_len = 0
    game_score = 0
    last_grid1 = np.zeros((4,4),dtype=np.int)
    last_grid1 = add_two(last_grid1)
    last_grid2 = make_input(last_grid1)
    while True:
        grid_array = add_two(last_grid1)
        board_list = []
        for m in range(4):
            g = grid_array.copy()
            s = push(g, m%4)
            if s >= 0:
                board_list.append( (g, m, s) )
        if board_list:
            boards = np.array([make_input(g) for g,m,s in board_list])
            p = model(torch.from_numpy(boards).cuda()).flatten().detach()        
            game_len+=1
            best_move = -1
            best_v = None
            for i, (g,m,s) in enumerate(board_list):
#                 print (s)
                v = 2 * s + p[i].item()
                if best_v is None or v > best_v:
                    best_v = v
                    best_move = m
                    best_score = 2 * s
                    best_grid1 = board_list[i][0]
                    best_grid2 = boards[i]
                    
            game_score += best_score
        else:
            best_v = 0
            best_grid1 = None
            best_grid2 = None
            
        if is_train:
            x, y = Vchange(last_grid2, best_v)
            x = torch.from_numpy(x).cuda()
            y = torch.from_numpy(y).unsqueeze(dim=1).cuda().float()
            model.train()
            optimizer.zero_grad()
            pred = model(x)
            loss = loss_fn(pred, y)
            last_loss = loss.item()
            loss.backward()
#             nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()
            model.eval()
#             if game_len % 50 == 0:
#                 print (game_len, last_loss)
                
        if not board_list:
            break
        last_grid2 = best_grid2
        last_grid1 = best_grid1
        
    return game_len, grid_array.max(), game_score

In [8]:
lr = 5e-4
weight_decay = 0
beta1 = 0.9

model = NN2048().cuda()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay, betas=(beta1, 0.999))
loss=nn.MSELoss()

In [9]:
import os
experiment_dir = ""

def save_model(state, filename='model.pth.tar'):
    filename = os.path.join(experiment_dir, filename)
    torch.save(state, filename)

def load_model(model, optimizer, checkpoint_path, model_only = False):
    ckpt_dict = torch.load(checkpoint_path, map_location="cuda:0")

    model.load_state_dict(ckpt_dict['state_dict'])
    if not model_only:
        optimizer.load_state_dict(ckpt_dict['optimizer'])
        epoch = ckpt_dict['epoch']
        running_mean = ckpt_dict['running_mean']
    else:
        epoch = None
        running_mean = None
    return model, optimizer, epoch, running_mean

In [10]:
num_epochs = 500
best_model = None

def train(model, optimizer, loss, epoch = 0, running_mean = 2048):
    ls = [1024] * 10
    while epoch != num_epochs:
        epoch += 1
        game_len, max_score, game_score = gen_sample_and_learn(model, optimizer, loss, True)
        print ('Game # ', epoch, 'Game length ', game_len, 'Max score ', max_score, 'Game score ', game_score, flush=True)
        ls.pop(0)
        ls.append(max_score)
        if sum(ls) / 10 > running_mean:
            running_mean = sum(ls) / 10
            filename = "model9_"+str(epoch // 100)+".pth.tar"
            save_model({
                'epoch': epoch,
                'running_mean': running_mean,
                'state_dict': model.cpu().state_dict(),
                'optimizer': optimizer.state_dict(),
            }, filename)
            best_model, _, _, _ = load_model(model, optimizer, filename, True)
            model.cuda()
            
train(model, optimizer, loss)

Game #  1 Game length  91 Max score  64 Game score  258.0
Game #  2 Game length  187 Max score  128 Game score  620.0
Game #  3 Game length  122 Max score  128 Game score  380.0
Game #  4 Game length  130 Max score  128 Game score  406.0
Game #  5 Game length  284 Max score  256 Game score  998.0
Game #  6 Game length  106 Max score  64 Game score  312.0
Game #  7 Game length  128 Max score  128 Game score  400.0
Game #  8 Game length  377 Max score  256 Game score  1362.0
Game #  9 Game length  244 Max score  256 Game score  856.0
Game #  10 Game length  95 Max score  64 Game score  266.0
Game #  11 Game length  145 Max score  128 Game score  456.0
Game #  12 Game length  259 Max score  128 Game score  898.0
Game #  13 Game length  170 Max score  128 Game score  556.0
Game #  14 Game length  189 Max score  128 Game score  630.0
Game #  15 Game length  301 Max score  256 Game score  1074.0
Game #  16 Game length  252 Max score  256 Game score  886.0
Game #  17 Game length  253 Max scor

Game #  135 Game length  1539 Max score  2048 Game score  5998.0
Game #  136 Game length  1900 Max score  2048 Game score  7436.0
Game #  137 Game length  1250 Max score  1024 Game score  4828.0
Game #  138 Game length  1208 Max score  1024 Game score  4652.0
Game #  139 Game length  523 Max score  512 Game score  1940.0
Game #  140 Game length  1037 Max score  1024 Game score  3988.0
Game #  141 Game length  830 Max score  1024 Game score  3170.0
Game #  142 Game length  1321 Max score  2048 Game score  5126.0
Game #  143 Game length  987 Max score  1024 Game score  3778.0
Game #  144 Game length  1716 Max score  2048 Game score  6678.0
Game #  145 Game length  907 Max score  1024 Game score  3480.0
Game #  146 Game length  1566 Max score  2048 Game score  6102.0
Game #  147 Game length  1778 Max score  2048 Game score  6946.0
Game #  148 Game length  807 Max score  1024 Game score  3078.0
Game #  149 Game length  999 Max score  1024 Game score  3850.0
Game #  150 Game length  1271 Ma

Game #  266 Game length  509 Max score  512 Game score  1896.0
Game #  267 Game length  1330 Max score  2048 Game score  5160.0
Game #  268 Game length  865 Max score  1024 Game score  3304.0
Game #  269 Game length  1449 Max score  2048 Game score  5662.0
Game #  270 Game length  810 Max score  1024 Game score  3080.0
Game #  271 Game length  504 Max score  512 Game score  1878.0
Game #  272 Game length  512 Max score  512 Game score  1908.0
Game #  273 Game length  1017 Max score  1024 Game score  3910.0
Game #  274 Game length  1563 Max score  2048 Game score  6086.0
Game #  275 Game length  1955 Max score  2048 Game score  7644.0
Game #  276 Game length  1483 Max score  2048 Game score  5774.0
Game #  277 Game length  1956 Max score  2048 Game score  7630.0
Game #  278 Game length  752 Max score  1024 Game score  2866.0
Game #  279 Game length  905 Max score  1024 Game score  3466.0
Game #  280 Game length  1095 Max score  1024 Game score  4210.0
Game #  281 Game length  994 Max sc

Game #  394 Game length  1442 Max score  2048 Game score  5608.0
Game #  395 Game length  1297 Max score  2048 Game score  5040.0
Game #  396 Game length  1193 Max score  2048 Game score  4624.0
Game #  397 Game length  416 Max score  512 Game score  1528.0
Game #  398 Game length  940 Max score  1024 Game score  3604.0
Game #  399 Game length  1053 Max score  1024 Game score  4046.0
Game #  400 Game length  1510 Max score  2048 Game score  5876.0
Game #  401 Game length  1618 Max score  2048 Game score  6304.0
Game #  402 Game length  2061 Max score  2048 Game score  8066.0
Game #  403 Game length  1575 Max score  2048 Game score  6122.0
Game #  404 Game length  1020 Max score  1024 Game score  3922.0
Game #  405 Game length  1082 Max score  1024 Game score  4154.0
Game #  406 Game length  810 Max score  1024 Game score  3098.0
Game #  407 Game length  1698 Max score  2048 Game score  6606.0
Game #  408 Game length  1048 Max score  1024 Game score  4024.0
Game #  409 Game length  1336

In [12]:

train(model, optimizer, loss)

Game #  1 Game length  930 Max score  1024 Game score  3562.0
Game #  2 Game length  1063 Max score  1024 Game score  4088.0
Game #  3 Game length  1428 Max score  2048 Game score  5558.0
Game #  4 Game length  1010 Max score  1024 Game score  3888.0
Game #  5 Game length  2661 Max score  4096 Game score  10466.0
Game #  6 Game length  1456 Max score  1024 Game score  5648.0
Game #  7 Game length  1936 Max score  2048 Game score  7576.0
Game #  8 Game length  2005 Max score  2048 Game score  7854.0
Game #  9 Game length  1051 Max score  1024 Game score  4044.0
Game #  10 Game length  2579 Max score  4096 Game score  10148.0
Game #  11 Game length  2516 Max score  4096 Game score  9906.0


UnboundLocalError: local variable 'epoch' referenced before assignment

In [11]:
num_epochs = 100

def test(model):
    epoch = 0
    while epoch != num_epochs:
        epoch += 1
        game_len, max_score, game_score, last_loss = gen_sample_and_learn(model, None, None, False)
        print ('Game # ', epoch, 'Game length ', game_len, 'Max score ', max_score, 'Game score ', game_score, flush=True)

test(best_model)


AttributeError: 'NoneType' object has no attribute 'eval'

In [None]:
import os
experiment_dir = "model"
filename = "model2.pth.tar"
num_epochs = 200

def save_model(state, filename='model.pth.tar'):
    filename = os.path.join(experiment_dir, filename)
    torch.save(state, filename)

save_model({
    'epoch': num_epochs,
    'state_dict': model.cpu().state_dict(),
    'optimizer': optimizer.state_dict(),
}, filename)