In [1]:
from c2048 import Game, push

In [2]:
import torch
from torch import nn
import torch.optim as optim
import numpy as np

In [9]:
class NN2048(nn.Module):
    def __init__(self, input_size=14, filter1=128, filter2=1024, drop_prob=0.):
        super(NN2048, self).__init__()
        self.conv_1 = nn.Conv2d(in_channels=input_size, out_channels=input_size, kernel_size=(1,1), padding=0)
        self.conv_2 = nn.Conv2d(in_channels=input_size, out_channels=input_size, kernel_size=(1,1), padding=0)
        self.conv_3 = nn.Conv2d(in_channels=input_size, out_channels=input_size, kernel_size=(1,1), padding=0)
        self.conv_4 = nn.Conv2d(in_channels=input_size, out_channels=input_size, kernel_size=(1,1), padding=0)
        self.conv_5 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,1), padding=0)
        self.conv_6 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,1), padding=0)
        
        self.conv_a = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,1), padding=0)
        self.conv_a3 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(3,1), padding=0)
        self.conv_a4 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(4,1), padding=0)
        self.conv_b = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,2), padding=0)
        self.conv_b3 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,3), padding=0)
        self.conv_b4 = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(1,4), padding=0)
        self.conv_c = nn.Conv2d(in_channels=input_size, out_channels=filter1, kernel_size=(2,2), padding=0)
        
        self.conv_aa = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_ab = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        self.conv_ba = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,1), padding=0)
        self.conv_bb = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,2), padding=0)
        
        self.conv_ab3 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,3), padding=0)
        self.conv_ba3 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(3,1), padding=0)
        self.conv_ab4 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(1,4), padding=0)
        self.conv_ba4 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(4,1), padding=0)
        self.conv_c2 = nn.Conv2d(in_channels=filter1, out_channels=filter2, kernel_size=(2,2), padding=0)
        self.pool = nn.MaxPool2d(2)
        
        self.relu = nn.ReLU()
        self.W_aa = nn.Linear(filter2 * 8, 1)
        self.W_ab = nn.Linear(filter2 * 9, 1)
        self.W_ba = nn.Linear(filter2 * 9, 1)
        self.W_bb = nn.Linear(filter2 * 8, 1)
        self.W_ab3 = nn.Linear(filter2 * 4, 1)
        self.W_ba3 = nn.Linear(filter2 * 4, 1)
        self.W_ab4 = nn.Linear(filter2 * 1, 1)
        self.W_ba4 = nn.Linear(filter2 * 1, 1)
        self.W_c = nn.Linear(filter2 * 1, 1)
        self.W_5 = nn.Linear(filter1 * 4, 1)
        self.W_6 = nn.Linear(filter1 * 16, 1)

    def flatten(self, x):
        N = x.size()[0]
        return x.view(N, -1)
        
    def forward(self, x):
        x = x.float()
        x1 = self.relu(self.conv_1(x))
        x2 = self.relu(self.conv_2(x))
        x3 = self.relu(self.conv_3(x))
        x4 = self.relu(self.conv_4(x))
        x5 = self.flatten(self.relu(self.conv_5(self.pool(x))))
        x6 = self.flatten(self.relu(self.conv_6(x)))
        
        a = self.relu(self.conv_a(x1))
        b = self.relu(self.conv_b(x1))
        c = self.relu(self.conv_c(x2))
        a3 = self.relu(self.conv_a3(x3))
        b3 = self.relu(self.conv_b3(x3))
        a4 = self.relu(self.conv_a4(x4))
        b4 = self.relu(self.conv_b4(x4))
        
        aa = self.flatten(self.relu(self.conv_aa(a)))
        ab = self.flatten(self.relu(self.conv_ab(a)))
        ba = self.flatten(self.relu(self.conv_ba(b)))
        bb = self.flatten(self.relu(self.conv_bb(b)))
        
        ab3 = self.flatten(self.relu(self.conv_ab3(a3)))
        ba3 = self.flatten(self.relu(self.conv_ba3(b3)))
        ab4 = self.flatten(self.relu(self.conv_ab4(a4)))
        ba4 = self.flatten(self.relu(self.conv_ba4(b4)))
        c2 = self.relu(self.conv_c2(c))
        c3 = self.flatten(self.pool(c2))
        
        out = self.W_aa(aa) + self.W_ab(ab) + self.W_ba(ba) + self.W_bb(bb) + \
              self.W_ab4(ab4) + self.W_ba4(ba4) + self.W_c(c3) + \
              self.W_ab3(ab3) + self.W_ba3(ba3) + self.W_5(x5) + self.W_6(x6)
        
        return out

In [4]:
table ={2**i:i for i in range(1,16)}
table[0]=0
def make_input(grid):
    g0 = grid
    r = np.zeros(shape=(14, 4, 4))
    for i in range(4):
        for j in range(4):
            v = g0[i, j]
            r[table[v],i, j]=1
    return r

In [5]:
def add_two(mat):
    indexs=np.argwhere(mat==0)
    index=np.random.randint(0,len(indexs))
    mat[tuple(indexs[index])] = 2
    return mat

In [6]:
def Vchange(grid, v):
    g0 = grid
    g1 = g0[:,::-1,:]
    g2 = g0[:,:,::-1]
    g3 = g2[:,::-1,:]
    r0 = grid.swapaxes(1,2)
    r1 = r0[:,::-1,:]
    r2 = r0[:,:,::-1]
    r3 = r2[:,::-1,:]
    xtrain = np.array([g0,g1,g2,g3,r0,r1,r2,r3])
    ytrain = np.array([v]*8)
    return xtrain, ytrain

def gen_sample_and_learn(model, optimizer, loss_fn, is_train = True):
    model.eval()
    game_len = 0
    game_score = 0
    last_grid1 = np.zeros((4,4),dtype=np.int)
    last_grid1 = add_two(last_grid1)
    last_grid2 = make_input(last_grid1)
    while True:
        grid_array = add_two(last_grid1)
        board_list = []
        for m in range(4):
            g = grid_array.copy()
            s = push(g, m%4)
            if s >= 0:
                board_list.append( (g, m, s) )
        if board_list:
            boards = np.array([make_input(g) for g,m,s in board_list])
            p = model(torch.from_numpy(boards).cuda()).flatten().detach()        
            game_len+=1
            best_move = -1
            best_v = None
            for i, (g,m,s) in enumerate(board_list):
#                 print (s)
                v = 2 * s + p[i].item()
                if best_v is None or v > best_v:
                    best_v = v
                    best_move = m
                    best_score = 2 * s
                    best_grid1 = board_list[i][0]
                    best_grid2 = boards[i]
                    
            game_score += best_score
        else:
            best_v = 0
            best_grid1 = None
            best_grid2 = None
            
        if is_train:
            x, y = Vchange(last_grid2, best_v)
            x = torch.from_numpy(x).cuda()
            y = torch.from_numpy(y).unsqueeze(dim=1).cuda().float()
            model.train()
            optimizer.zero_grad()
            pred = model(x)
            loss = loss_fn(pred, y)/2
            last_loss = loss.item()
            loss.backward()
#             nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()
            model.eval()
#             if game_len % 50 == 0:
#                 print (game_len, last_loss)
                
        if not board_list:
            break
        last_grid2 = best_grid2
        last_grid1 = best_grid1
        
    return game_len, grid_array.max(), game_score

In [10]:
lr = 1e-3
weight_decay = 1e-6
beta1 = 0.8

model = NN2048().cuda()
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay, betas=(beta1, 0.999))
loss=nn.MSELoss()

In [11]:
num_epochs = 500

def train(model, optimizer, loss):
    epoch = 0
    while epoch != num_epochs:
        epoch += 1
        res = gen_sample_and_learn(model, optimizer, loss, True)
        print ('epoch', epoch, res)
    
train(model, optimizer, loss)

epoch 1 (185, 128, 1744)
epoch 2 (143, 64, 1148)
epoch 3 (346, 512, 4724)
epoch 4 (287, 256, 3212)
epoch 5 (236, 256, 2604)
epoch 6 (149, 128, 1372)
epoch 7 (270, 256, 3120)
epoch 8 (228, 256, 2496)
epoch 9 (256, 256, 2816)
epoch 10 (230, 256, 2532)
epoch 11 (189, 128, 1776)
epoch 12 (199, 128, 1824)
epoch 13 (223, 256, 2468)
epoch 14 (153, 128, 1392)
epoch 15 (187, 128, 1764)
epoch 16 (130, 64, 1004)
epoch 17 (281, 256, 3200)
epoch 18 (182, 128, 1700)
epoch 19 (265, 256, 3008)
epoch 20 (231, 256, 2548)
epoch 21 (429, 512, 5744)
epoch 22 (332, 256, 4132)
epoch 23 (178, 128, 1680)
epoch 24 (596, 512, 8764)
epoch 25 (246, 256, 2848)
epoch 26 (149, 128, 1312)
epoch 27 (290, 256, 3284)
epoch 28 (261, 256, 2976)
epoch 29 (288, 256, 3276)
epoch 30 (148, 128, 1280)
epoch 31 (227, 128, 2264)
epoch 32 (207, 128, 2016)
epoch 33 (106, 64, 768)
epoch 34 (268, 256, 3024)
epoch 35 (499, 512, 6812)
epoch 36 (171, 128, 1548)
epoch 37 (170, 128, 1532)
epoch 38 (310, 256, 3500)
epoch 39 (297, 256, 3324)

KeyboardInterrupt: 

In [16]:
num_epochs = 50
model.cuda()

def test(model):
    epoch = 0
    while epoch != num_epochs:
        epoch += 1
        res = gen_sample_and_learn(model, None, None, False)
        print (epoch, res)

test(model)

1 (529, 512, 7248)
2 (516, 512, 7128)
3 (304, 256, 3468)
4 (615, 512, 8284)
5 (737, 1024, 11728)
6 (587, 512, 8228)
7 (846, 1024, 13128)
8 (938, 1024, 14924)


KeyboardInterrupt: 

In [14]:
import os
experiment_dir = "model"
filename = "model2.pth.tar"
num_epochs = 200

def save_model(state, filename='model.pth.tar'):
    filename = os.path.join(experiment_dir, filename)
    torch.save(state, filename)

save_model({
    'epoch': num_epochs,
    'state_dict': model.cpu().state_dict(),
    'optimizer': optimizer.state_dict(),
}, filename)