## imports

In [1]:
import sys
sys.path.append("..") # Adds higher directory to python modules path

In [2]:
from a_star_search import load_model

In [3]:
model = load_model('models/DAVI_steps_15_10x10-longTraining-no-batchnorm_1565_10.565299987792969.pth', input_size=10*10*4)

In [4]:
import pyximport; pyximport.install()
pyximport.install(pyimport=True)

(<pyximport.pyximport.PyImporter at 0x7f2350fc4350>, None)

In [5]:
import gzip
import pickle

In [6]:
import gc
gc.collect()

270

## solve games

In [None]:
from a_star_search import search_way
from tqdm import tqdm_notebook
import time
import numpy as np
from scipy.special._ufuncs import binom

actions_len_list = []
explored_len_list = []
possible_states_list = []
solve_time = []
results = []


with gzip.open('data/boxban-hard.pkl.gz', 'rb') as f:
    envs = pickle.load(f)

for env in tqdm_notebook(envs):
    start_time = time.time()
    result, explored_len = search_way(env, model, progress_bar_update_iterations=10000)
    if type(result) is list and len(result) > 0:
        solve_time.append(time.time() - start_time)
        actions_len_list.append(len(result))
        explored_len_list.append(len(result) / float(explored_len))
        x = sum((env.room_state > 0).flatten())
        possible_states_list.append(len(result) / x * binom(x-1, env.num_boxes))
        results.append(result)
    else:
        print('ERROR, could not find a path 😢')
        
    all_stuff = {
        'actions_len_list': actions_len_list,
        'explored_len_list': explored_len_list,
        'possible_states_list': possible_states_list,
        'solve_time': solve_time,
        'results': results
    }

    with gzip.open(f'stats/temp_boxban-hard.pkl.gz', 'wb') as f:
        pickle.dump(all_stuff, f, pickle.HIGHEST_PROTOCOL)
        

7.4.0


HBox(children=(IntProgress(value=0), HTML(value='')))

 75%|███████▌  | 1219667/1623160 [3:49:26<1:16:20, 88.09it/s]
 59%|█████▉    | 125217/212520 [22:30<15:28, 93.98it/s]
 48%|████▊     | 483680/1006880 [1:26:27<1:33:14, 93.52it/s]
 34%|███▍      | 856278/2509710 [2:27:24<4:36:53, 99.52it/s] 
 79%|███████▉  | 80314/101745 [13:20<03:32, 100.65it/s]
 36%|███▋      | 61258/168245 [10:10<17:43, 100.65it/s]
 13%|█▎        | 16657/131670 [02:49<19:21, 99.03it/s] 
 95%|█████████▌| 96910/101745 [16:52<00:48, 98.99it/s]
 61%|██████    | 721359/1186680 [2:01:17<1:18:22, 98.96it/s] 
  9%|▉         | 220687/2509710 [36:47<6:21:59, 99.87it/s] 
 11%|█         | 18665/168245 [03:05<24:48, 100.48it/s]
 39%|███▉      | 333661/849555 [55:29<1:25:46, 100.25it/s]
 30%|███       | 654853/2179485 [1:49:43<4:15:26, 99.47it/s]
 67%|██████▋   | 142265/212520 [23:38<11:32, 101.43it/s]
 74%|███████▎  | 156539/212520 [26:04<09:14, 100.99it/s]
 53%|█████▎    | 41248/77520 [06:51<06:01, 100.23it/s]
 41%|████▏     | 576338/1391280 [1:36:26<2:16:48, 99.29it/s] 
 13%|█▎

## statistics

In [15]:
print('actions_len_list', np.mean(actions_len_list))
print('explored_len_list', np.mean(explored_len_list) * 100)
print('possible_states_list', np.mean(possible_states_list))
print('solve_time', np.mean(solve_time))

actions_len_list 56.15
explored_len_list 0.07454540107511129
possible_states_list 1146978.25
solve_time 4445.830259296894


In [None]:
temp = []
explored_len_list_ = []
possible_states_list = []
for env, result, explored_len in zip(envs, results, explored_len_list):
    x = sum((env.room_state > 0).flatten())
    explored_states = 1 / explored_len * len(result)
    explored_len_list_.append(explored_states)
    possible_states = x * binom(x-1, env.num_boxes)
    possible_states_list.append(possible_states)
    temp.append(explored_states / possible_states * 100)


In [None]:
stats = {
    'actions_len_list': actions_len_list,
    'explored_len_list': explored_len_list_,
    'possible_states_list': possible_states_list,
    'solve_time': solve_time,
    'results': results
}

with gzip.open(f'stats/boxban-hard.pkl.gz', 'wb') as f:
    pickle.dump(stats, f, pickle.HIGHEST_PROTOCOL)

## plots

In [None]:
import matplotlib.pyplot as plt

In [None]:
fig = plt.figure(figsize=(25, 25))
for idx, env in enumerate(envs):
    img = env.render(mode='rgb_array')
    fig.add_subplot(10, 10, idx + 1)
    plt.axis('off')
    plt.title(f'boxban-hard {idx + 1}')
    plt.imshow(img)
plt.axis('off')
fig.tight_layout()
plt.savefig("plots/boxban-hard_unsolved.pdf")
plt.show()

In [None]:
from copy import deepcopy

fig = plt.figure(figsize=(25, 25))
for idx, (env, actions) in enumerate(zip(envs, results)):
    env = deepcopy(env)
    
    step = 0
    for action in actions:
        next_state, reward, done, _ = env.step(action)
        step += 1
    
    img = env.render(mode='rgb_array')
    fig.add_subplot(10, 10, idx + 1)
    plt.axis('off')
    plt.title(f'boxban-hard {idx + 1}')
    plt.imshow(img)
plt.axis('off')
fig.tight_layout()
plt.savefig("plots/boxban-hard_solved.pdf")
plt.show()

In [11]:
import datetime
print(datetime.datetime.utcnow())

2019-09-19 13:41:17.348107
