-
Notifications
You must be signed in to change notification settings - Fork 0
/
ai_2048.py
216 lines (191 loc) · 7.64 KB
/
ai_2048.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#!/usr/bin/env python3
# coding: utf-8
import math
import time
import sys
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from random import randint, shuffle, seed
from collections import defaultdict
from IPython.display import clear_output
from jeu_2048 import Game, push
# Imports que j'ai pas (pour le moment)
# - selenium, theano, lasagne
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import theano
import theano.tensor as T
import lasagne
from lasagne.layers import DenseLayer, InputLayer, batch_norm, DropoutLayer
from lasagne.layers import MergeLayer, ReshapeLayer, FlattenLayer, ConcatLayer
from lasagne.nonlinearities import rectify, elu, softmax, sigmoid
from lasagne.init import Constant, Sparse
#from lasagne.layers.dnn import Conv2DDNNLayer
from lasagne.regularization import regularize_network_params, l1, l2, regularize_layer_params_weighted
#floatX = theano.config.floatX # en rapport avec la carte graphique
floatX = float32
# RESEAU DE NEURONES
# =============================================================================
def Winit(shape): # pas utilisé (en commentaire ci dessous..??)
rtn = np.random.normal(size=shape).astype(floatX)
rtn[np.random.uniform(size=shape) < 0.9] *= 0.01
return rtn
input_var = T.tensor4()
target_var = T.vector()
N_FILTERS = 512
N_FILTERS2 = 4096
_ = InputLayer(shape=(None, 16, 4, 4), input_var=input_var)
conv_a = Conv2DDNNLayer(_, N_FILTERS, (2,1), pad='valid')#, W=Winit((N_FILTERS, 16, 2, 1)))
conv_b = Conv2DDNNLayer(_, N_FILTERS, (1,2), pad='valid')#, W=Winit((N_FILTERS, 16, 1, 2)))
conv_aa = Conv2DDNNLayer(conv_a, N_FILTERS2, (2,1), pad='valid')#, W=Winit((N_FILTERS2, N_FILTERS, 2, 1)))
conv_ab = Conv2DDNNLayer(conv_a, N_FILTERS2, (1,2), pad='valid')#, W=Winit((N_FILTERS2, N_FILTERS, 1, 2)))
conv_ba = Conv2DDNNLayer(conv_b, N_FILTERS2, (2,1), pad='valid')#, W=Winit((N_FILTERS2, N_FILTERS, 2, 1)))
conv_bb = Conv2DDNNLayer(conv_b, N_FILTERS2, (1,2), pad='valid')#, W=Winit((N_FILTERS2, N_FILTERS, 1, 2)))
_ = ConcatLayer([FlattenLayer(x) for x in [conv_aa, conv_ab, conv_ba, conv_bb, conv_a, conv_b]])
l_out = DenseLayer(_, num_units=1, nonlinearity=None)
prediction = lasagne.layers.get_output(l_out)
P = theano.function([input_var], prediction)
loss = lasagne.objectives.squared_error(prediction, target_var).mean()/2
#layers = {conv1: 0.5, conv2: 0.5}
#l1_penalty = regularize_layer_params_weighted(layers, l1)
#loss = loss + 1e-4 * l1_penalty
accuracy = lasagne.objectives.squared_error(prediction, target_var).mean()
params = lasagne.layers.get_all_params(l_out, trainable=True)
#params = [l_out.W]
updates = lasagne.updates.adam(loss, params, beta1=0.5)
#updates = lasagne.updates.sgd(loss, params, learning_rate=α)
#updates = lasagne.updates.adamax(loss, params)
train_fn = theano.function([input_var, target_var], loss, updates=updates)
loss_fn = theano.function([input_var, target_var], loss)
accuracy_fn =theano.function([input_var, target_var], accuracy)
# LIEN ENTRE LE DNN ET LE JEU 2048
# =============================================================================
# Transformation du format des données issues de la grille 2048 en table de
# valeurs binaires
table ={2**i:i for i in range(1,16)}
table[0]=0
def make_input(grid):
"""Création d'une table de 16 grilles de 4 sur 4, représentation binaire de
la grille de jeu du 2048 (en considérant que la valeur maximale atteignable
est 2^15, soit 32768)"""
g0 = grid
r = np.zeros(shape=(16, 4, 4), dtype=floatX)
for i in range(4):
for j in range(4):
v = g0[i, j]
r[table[v],i, j]=1
return r
# Ecriture des données dans un fichier log (PAS UTILISÉ ENSUITE...)
logf=open("logf-rl-theano-n-tuple-6", "w")
def printx(*a, **kw):
print(*a, file=logf, flush=True, **kw)
print(*a, flush=True, **kw)
def get_grid(driver):
grid = np.zeros(shape=(4,4), dtype='uint16')
for x in driver.find_elements_by_class_name('tile'):
cl = x.get_attribute('class').split()
for t in cl:
if t.startswith('tile-position-'):
pos = int(t[14])-1, int(t[16])-1
elif t.startswith('tile-') and t[5].isdigit():
v = int(t[5:])
grid[pos[1], pos[0]] = v
return grid
matplotlib.rcParams['figure.figsize'] = (12.0, 8.0)
def Vchange(grid, v):
g0 = grid
g1 = g0[:,::-1,:]
g2 = g0[:,:,::-1]
g3 = g2[:,::-1,:]
r0 = grid.swapaxes(1,2)
r1 = r0[:,::-1,:]
r2 = r0[:,:,::-1]
r3 = r2[:,::-1,:]
xtrain = np.array([g0,g1,g2,g3,r0,r1,r2,r3], dtype=floatX)
ytrain = np.array([v]*8, dtype=floatX)
train_fn(xtrain, ytrain)
# Récupère les données de la page internet (représentée par le 'driver'), puis
# fait une prédiction à partir du modèle ML (déjà entrainé du coup) et renvoie
# le mouvement à effectuer, tour après tour, jusqu'à la fin de la partie
arrow=[Keys.ARROW_LEFT, Keys.ARROW_UP, Keys.ARROW_RIGHT, Keys.ARROW_DOWN]
def gen_sample_and_learn(driver):
body = driver.find_element_by_tag_name('body')
game_len = 0
game_score = 0
last_grid = None
keep_playing =False
while True:
try:
grid_array = get_grid(driver)
except:
grid_array = None
board_list = []
if grid_array is not None:
if not keep_playing and grid_array.max()==2048:
driver.find_element_by_class_name('keep-playing-button').click()
keep_playing = True
time.sleep(1)
for m in range(4):
g = grid_array.copy()
s = push(g, m%4) # utilité de % ??
if s >= 0:
board_list.append( (g, m, s) )
if board_list:
boards = np.array([make_input(g) for g,m,s in board_list], dtype=floatX)
p = P(boards).flatten()
game_len+=1
best_move = -1
best_v = None
for i, (g,m,s) in enumerate(board_list):
v = 2*s + p[i]
if best_v is None or v > best_v:
best_v = v
best_move = m
best_score = 2*s
best_grid = boards[i]
body.send_keys(arrow[best_move]) # envoie la touche "appuyée" au jeu
game_score += best_score
else:
best_v = 0
best_grid = None
if last_grid is not None:
Vchange(last_grid, best_v)
last_grid = best_grid
if not board_list:
break
plt.pause(0.05)
return game_len, grid_array.max(), game_score
# =============================================================================
# sorte de MAIN
results = []
driver = webdriver.Firefox()
graph = plt.plot([], [], 'b')[0]
dots256 = plt.plot([],[], 'ro')[0]
dots512 = plt.plot([],[], 'yo')[0]
dots1024 = plt.plot([],[], 'go')[0]
plt.xlim((0,100))
plt.ylim((0,25000))
# Fait plein de parties à la suite
for j in range(200):
driver.get("https://gabrielecirulli.github.io/2048/")
time.sleep(2)
# joue la partie (à l'aide des prédictions du modèle
result = gen_sample_and_learn(driver)
print(j, result)
results.append(result)
graph.set_data(np.arange(len(results)), np.array(results)[:, 2])
dots_data =[[],[],[]]
for i, d in enumerate(results):
c = 0 if d[1]<=256 else (1 if d[1]==512 else 2)
dots_data[c].append([i, d[2]])
dots_graph = [dots256, dots512, dots1024]
for i in range(3):
if dots_data[i]:
xy = np.array(dots_data[i])
dots_graph[i].set_data(xy[:, 0], xy[:,1])
plt.title("Game #%d"%j, fontsize=64)
plt.draw()
plt.pause(3)
if result[1] >= 2048:
break