## imports

In [1]:
import sys
sys.path.append("..") # Adds higher directory to python modules path

In [2]:
from a_star_search import load_model

In [3]:
model = load_model('models/DAVI_steps_15_10x10-longTraining-no-batchnorm_1565_10.565299987792969.pth', input_size=10*10*4)

In [4]:
import pyximport; pyximport.install()
pyximport.install(pyimport=True)

(<pyximport.pyximport.PyImporter at 0x7f49b13d7b10>, None)

In [5]:
import gzip
import pickle

In [6]:
import gc
gc.collect()

270

## solve games

In [None]:
from a_star_search import search_way
from tqdm import tqdm_notebook
import time
import numpy as np
from scipy.special._ufuncs import binom

actions_len_list = []
explored_len_list = []
possible_states_list = []
solve_time = []
results = []


with gzip.open('data/sokoban-v1.pkl.gz', 'rb') as f:
    envs = pickle.load(f)

for env in tqdm_notebook(envs):
    start_time = time.time()
    result, explored_len = search_way(env, model)
    if type(result) is list and len(result) > 0:
        solve_time.append(time.time() - start_time)
        actions_len_list.append(len(result))
        explored_len_list.append(len(result) / float(explored_len))
        x = sum((env.room_state > 0).flatten())
        possible_states_list.append(len(result) / x * binom(x-1, env.num_boxes))
        results.append(result)
    else:
        print('ERROR, could not find a path 😢')
        

7.4.0


HBox(children=(IntProgress(value=0), HTML(value='')))

  1%|          | 936/77520 [00:11<15:20, 83.23it/s]
  0%|          | 87/42840 [00:00<08:06, 87.97it/s]
  3%|▎         | 3512/101745 [00:41<19:11, 85.29it/s]
 41%|████▏     | 575161/1391280 [1:51:57<2:38:51, 85.62it/s]
 50%|█████     | 165070/328900 [31:41<30:06, 90.68it/s] 
 33%|███▎      | 25617/77520 [04:51<09:51, 87.75it/s]
  2%|▏         | 1108/58140 [00:12<10:48, 87.98it/s]
 23%|██▎       | 61006/265650 [11:39<39:06, 87.20it/s]
  8%|▊         | 50271/593775 [09:49<1:46:08, 85.35it/s]
 50%|████▉     | 38381/77520 [07:32<07:41, 84.87it/s]
  6%|▌         | 47804/849555 [09:09<2:33:30, 87.05it/s]
 74%|███████▎  | 1601995/2179485 [5:19:06<1:55:01, 83.67it/s] 
  8%|▊         | 13415/168245 [02:24<27:44, 93.03it/s]
  2%|▏         | 1812/77520 [00:19<13:38, 92.48it/s]
  5%|▌         | 29822/593775 [05:25<1:42:44, 91.49it/s]
  1%|          | 747/77520 [00:09<15:58, 80.09it/s]
 36%|███▋      | 96807/265650 [17:37<30:43, 91.57it/s]  
  3%|▎         | 130768/3746990 [23:04<10:38:02, 94.46it/s

## statistics

In [15]:
print('actions_len_list', np.mean(actions_len_list))
print('explored_len_list', np.mean(explored_len_list) * 100)
print('possible_states_list', np.mean(possible_states_list))
print('solve_time', np.mean(solve_time))

actions_len_list 29.95
explored_len_list 0.4983309357639707
possible_states_list 985322.25
solve_time 4006.5738418126107


In [None]:
temp = []
explored_len_list_ = []
possible_states_list = []
for env, result, explored_len in zip(envs, results, explored_len_list):
    x = sum((env.room_state > 0).flatten())
    explored_states = 1 / explored_len * len(result)
    explored_len_list_.append(explored_states)
    possible_states = x * binom(x-1, env.num_boxes)
    possible_states_list.append(possible_states)
    temp.append(explored_states / possible_states * 100)


In [None]:
stats = {
    'actions_len_list': actions_len_list,
    'explored_len_list': explored_len_list_,
    'possible_states_list': possible_states_list,
    'solve_time': solve_time,
    'results': results
}

with gzip.open(f'stats/sokoban-v1.pkl.gz', 'wb') as f:
    pickle.dump(stats, f, pickle.HIGHEST_PROTOCOL)

## plots

In [None]:
import matplotlib.pyplot as plt

In [None]:
fig = plt.figure(figsize=(25, 25))
for idx, env in enumerate(envs):
    img = env.render(mode='rgb_array')
    fig.add_subplot(10, 10, idx + 1)
    plt.axis('off')
    plt.title(f'sokoban-v1 {idx + 1}')
    plt.imshow(img)
plt.axis('off')
fig.tight_layout()
plt.savefig("plots/sokoban-v1_unsolved.pdf")
plt.show()

In [None]:
from copy import deepcopy

fig = plt.figure(figsize=(25, 25))
for idx, (env, actions) in enumerate(zip(envs, results)):
    env = deepcopy(env)
    
    step = 0
    for action in actions:
        next_state, reward, done, _ = env.step(action)
        step += 1
    
    img = env.render(mode='rgb_array')
    fig.add_subplot(10, 10, idx + 1)
    plt.axis('off')
    plt.title(f'sokoban-v1 {idx + 1}')
    plt.imshow(img)
plt.axis('off')
fig.tight_layout()
plt.savefig("plots/sokoban-v1_solved.pdf")
plt.show()

In [13]:
import datetime
print(datetime.datetime.utcnow())

2019-09-18 23:37:48.974937
