# 2048 Keras

In [4]:
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, Input, concatenate, BatchNormalization, Activation
from keras.optimizers import Adadelta
import numpy as np

BATCH_SIZE = 128
NUM_EPOCHS = 15

In [3]:
OUT_SHAPE = (4,4)
CAND = 16
map_table = {2**i : i for i in range(1,CAND)}
map_table[0] = 0
vmap = np.vectorize(lambda x: map_table[x])

def grid_one(arr):
    ret = np.zeros(shape=OUT_SHAPE+(CAND,),dtype=bool)  # shape = (4,4,16)
    for r in range(OUT_SHAPE[0]):
        for c in range(OUT_SHAPE[1]):
            ret[r,c,arr[r,c]] = 1
    return ret

In [None]:
import csv
data = []
with open("./train/train1M_1.csv") as f:
    for line in f:
        piece = eval(line)
        data.append(piece)

In [None]:
data = np.array(data)

In [None]:
x = np.array([ grid_one(piece[:-1].reshape(4,4)) for piece in data ])
y = keras.utils.to_categorical(data[:,-1], 4)

In [None]:
sep = 1100000
x_train = x[:sep]
x_test = x[sep:]
y_train = y[:sep]
y_test = y[sep:]

In [None]:
x_test.shape

In [None]:
model = keras.models.load_model('model_k.h5')

In [5]:
inputs = Input((4,4,16))
conv = inputs
FILTERS = 128
conv41 = Conv2D(filters=FILTERS, kernel_size=(4,1), kernel_initializer='he_uniform')(conv)
conv14 = Conv2D(filters=FILTERS, kernel_size=(1,4), kernel_initializer='he_uniform')(conv)
conv22 = Conv2D(filters=FILTERS, kernel_size=(2,2), kernel_initializer='he_uniform')(conv)
conv33 = Conv2D(filters=FILTERS, kernel_size=(3,3), kernel_initializer='he_uniform')(conv)
conv44 = Conv2D(filters=FILTERS, kernel_size=(4,4), kernel_initializer='he_uniform')(conv)
hidden = concatenate([Flatten()(conv41), Flatten()(conv14), Flatten()(conv22), Flatten()(conv33), Flatten()(conv44)])
x = BatchNormalization()(hidden)
outputs = Activation('relu')(x)
hmodel = Model(inputs,outputs)

'''
direction:
    0: left
    1: down
    2: right
    3: up
'''

in0 = Input((4,4,16))
in1 = Input((4,4,16))
in2 = Input((4,4,16))
in3 = Input((4,4,16))
out0 = hmodel(in0)
out1 = hmodel(in1)
out2 = hmodel(in2)
out3 = hmodel(in3)

x = concatenate([out0,out1,out2,out3])
x = Dense(512,kernel_initializer='he_uniform')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
out = Dense(4, activation='softmax')(x)
model = Model([in0,in1,in2,in3], out)

#for width in [512,128]:
#    x = Dense(width,kernel_initializer='he_uniform')(x)
#    x = BatchNormalization()(x)
#    x = Activation('relu')(x)
#outputs = Dense(4,activation='softmax')(x)
#model = Model(inputs, outputs)

model.summary()
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 4, 4, 16)     0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 4, 4, 16)     0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 4, 4, 16)     0                                            
__________________________________________________________________________________________________
input_5 (InputLayer)            (None, 4, 4, 16)     0                                            
__________________________________________________________________________________________________
model_1 (M

In [None]:
# train , validation_data=(x_test,y_test)
model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, verbose=1)

In [None]:
score_test = model.evaluate(x_test,y_test,verbose=0)
print('Testing loss: %.4f, Testing accuracy: %.2f' % (score_test[0],score_test[1]))

In [None]:
model.save('model_k.h5')  # creates a HDF5 file 'my_model.h5'

# 在线学习

In [1]:
import keras
from keras.models import Model
import numpy as np

import random
from collections import namedtuple
from game2048.game import Game
from game2048.expectimax import board_to_move

test_game = Game(4, random=False)
test_game.enable_rewrite_board = True



OUT_SHAPE = (4,4)
CAND = 16
map_table = {2**i : i for i in range(1,CAND)}
map_table[0] = 0
vmap = np.vectorize(lambda x: map_table[x])

def grid_one(arr):
    ret = np.zeros(shape=OUT_SHAPE+(CAND,),dtype=bool)  # shape = (4,4,16)
    for r in range(OUT_SHAPE[0]):
        for c in range(OUT_SHAPE[1]):
            ret[r,c,arr[r,c]] = 1
    return ret


Guide = namedtuple('Guides', ('state', 'action'))

class Guides:
    
    def __init__(self, cap):
        self.cap = cap
        self.mem = []
        self.pos = 0
        
    def push(self, *args):
        if len(self.mem) < self.cap:
            self.mem.append(None)
        self.mem[self.pos] = Guide(*args)
        self.pos = (self.pos + 1) % self.cap
        
    def sample(self, batch_size):
        return random.sample(self.mem, batch_size)
    
    def ready(self,batch_size):
        return len(self.mem) >= batch_size
    
    def __len__(self):
        return len(self.mem)
    

Using TensorFlow backend.


Loaded expectmax lib for 2048: /home/faymek/2048-api/game2048/expectimax/bin/2048.so


In [48]:
class ModelWrapper:
    
    def __init__(self, model, cap):
        self.model = model
        self.mem = Guides(cap)
        #self.writer = tf.
        self.trainning_step = 0
        self.buf = []
        
    def predict(self, buf):
        return model.predict(x = [[board] for board in buf])
    
    def move(self, game):
        cur_board = game.board
        self.buf = []
        for d in range(4):
            test_game.board = cur_board
            test_game.move(d)
            self.buf.append(grid_one(vmap(test_game.board)))
        self.mem.push(self.buf, board_to_move(game.board))
        game.move(self.predict(self.buf).argmax())
        
        
    def train(self, batch):
        if self.mem.ready(batch):
            guides = self.mem.sample(batch)
            X = []
            Y = []
            for guide in guides:
                X.append([[board] for board in guide.state])
                ohe_action = [0]*4
                ohe_action[guide.action] = 1
                Y.append(ohe_action)
            loss, acc = self.model.train_on_batch(np.array(X), np.array(Y))
            print('#%d \t loss:%.3f \t acc:%.3f'%(self.trainning_step, float(loss), float(acc)))
            self.trainning_step += 1

In [55]:
mw.mem.sample(1)[0].state

[array([[[False,  True, False, False, False, False, False, False, False,
          False, False, False, False, False, False, False],
         [ True, False, False, False, False, False, False, False, False,
          False, False, False, False, False, False, False],
         [ True, False, False, False, False, False, False, False, False,
          False, False, False, False, False, False, False],
         [ True, False, False, False, False, False, False, False, False,
          False, False, False, False, False, False, False]],
 
        [[False, False,  True, False, False, False, False, False, False,
          False, False, False, False, False, False, False],
         [ True, False, False, False, False, False, False, False, False,
          False, False, False, False, False, False, False],
         [ True, False, False, False, False, False, False, False, False,
          False, False, False, False, False, False, False],
         [ True, False, False, False, False, False, False, False, 

In [33]:
#model = keras.models.load_model('model_ol.h5')

In [49]:
MEMORY = 32768
BATCH = 64
mw = ModelWrapper(model,MEMORY)

In [50]:
while True:
    game = Game(4, random=False)
    while not game.end:
        mw.move(game)
    print('score:',game.score, end='\t')
    mw.train(BATCH)
    if(mw.trainning_step%10==0):
        model.save('modelOL.h5')

score: 32	score: 16	

ValueError: Error when checking model input: the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 4 array(s), but instead got the following list of 1 arrays: [array([[[[[[False, False, False, ..., False, False, False],
           [False, False, False, ..., False, False, False],
           [ True, False, False, ..., False, False, False],
           [ True, ...

In [5]:
# 先填满内存
while not mw.mem.ready(BATCH):
    game = Game(4, random=False)
    while not game.end:
        mw.move(game)

In [None]:
def generate_data():
    while True:
        game = Game(4, random=False)
        while not game.end:
            mw.move(game)
        print('score:',game.score, end='\t')
        X = []
        Y = []
        for guide in mw.mem.mem:
            X.append(guide.state)
            ohe_action = [0]*4
            ohe_action[guide.action] = 1
            Y.append(ohe_action)
        yield np.array(X), np.array(Y)
            
model.fit_generator(generate_data(), steps_per_epoch=1, epochs=20)

In [46]:
len(mw.mem.mem)

7450

# 测试分数

In [2]:
from game2048.game import Game
from game2048.displays import Display, IPythonDisplay
from game2048.agents import Agent, RandomAgent, ExpectiMaxAgent
# from game2048.displayer import Displayer
display1 = Display()
display2 = IPythonDisplay()

In [None]:
map_table = {2**i : i for i in range(1,CAND)}
map_table[0] = 0

class MyAgent(Agent):

    def __init__(self, game, display=None):
        super().__init__(game, display)
        
    def step(self):
        x0 = np.array([ grid_one(vmap(game.board)) ])
        preds = list(model.predict(x0))
        direction = np.argmax(preds[0])
        return direction


In [None]:
game = Game(4, random=False)
agent = MyAgent(game, display=display1)
agent.play()
game.score

In [None]:
scores = []
for i in range(10):
    game = Game(4, random=False)
    agent = MyAgent(game, display=display1)
    agent.play()
    scores.append(game.score)
scores

In [45]:
from game2048.game import Game
from game2048.expectimax import board_to_move
vd2 = np.vectorize(lambda x: int(x/2))

In [211]:
game = Game(4, random=False, score_to_win=1024)
#game.enable_rewrite_board = True
#cur_board = game.board
count = 0
err = []
while not game.end:
    d = board_to_move(game.board)
    count += 1
    cur_board = game.board
    ds = []
    for i in range(4):
        ds.append(board_to_move(cur_board))
        cur_board = np.rot90(cur_board)

    if not (0 in ds and 1 in ds and 2 in ds and 3 in ds):
        print(game.board)
    game.move(d)
print(count)

[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  4.]
 [ 0.  0.  0.  2.]]
[[ 0.  0.  0.  0.]
 [ 0.  0.  4.  0.]
 [ 0.  0.  0.  4.]
 [ 4.  0.  0.  2.]]
[[ 4.  0.  8.  4.]
 [ 0.  0.  0.  2.]
 [ 0.  0.  0.  2.]
 [ 0.  0.  0.  0.]]
[[ 4.  8.  4.  0.]
 [ 2.  0.  0.  0.]
 [ 2.  0.  0.  0.]
 [ 0.  0.  0.  2.]]
[[ 4.  8.  4.  0.]
 [ 2.  0.  0.  0.]
 [ 2.  0.  0.  2.]
 [ 2.  0.  0.  0.]]
[[ 4.  2.  0.  0.]
 [ 2.  0.  0.  0.]
 [ 4.  0.  0.  0.]
 [ 2.  8.  4.  4.]]
[[   0.    0.    0.    4.]
 [   2.    0.    0.    0.]
 [  16.    4.    0.    0.]
 [ 256.    4.    4.    0.]]
[[   0.    0.    2.    0.]
 [   4.    0.    0.    0.]
 [  16.    0.    0.    0.]
 [ 256.    8.    4.    4.]]
[[   8.    8.    0.    0.]
 [  16.    0.    0.    0.]
 [  32.   16.    4.    2.]
 [ 256.   32.    4.    0.]]
[[   0.    0.    2.    0.]
 [   0.    0.    4.    0.]
 [   4.    8.    2.    2.]
 [ 256.  128.   64.   32.]]
[[   0.    0.    2.    4.]
 [   0.    0.    4.    2.]
 [   8.    0.   64.    4.]
 [   8.   32.   32.  

In [141]:
print(cur_board)
print(board_to_move(cur_board))
print(vd2(cur_board))
print(board_to_move(vd2(cur_board)))

[[    4.     0.     0.     0.]
 [    4.     0.     0.     0.]
 [   16.     8.     8.     0.]
 [   32.   128.   512.  1024.]]
0
[[  2   0   0   0]
 [  2   0   0   0]
 [  8   4   4   0]
 [ 16  64 256 512]]
1


In [118]:
m2 = np.ones((4,4))*2

In [202]:
game = Game(4, random=True)
cur_board = game.board
print(cur_board)
for i in range(4):
    print(board_to_move(cur_board))
    cur_board = np.rot90(cur_board)


[[ 16 512 256   8]
 [256  64   4  64]
 [512 256  32   8]
 [ 32   8  64  64]]
0
3
0
3


In [180]:
32 in game.board

True

585