## imports

In [1]:
import sys
sys.path.append("..") # Adds higher directory to python modules path

In [2]:
from a_star_search import load_model

In [3]:
model = load_model('models/DAVI_steps_15_10x10-longTraining-no-batchnorm_1565_10.565299987792969.pth', input_size=10*10*4)

In [4]:
import pyximport; pyximport.install()
pyximport.install(pyimport=True)

(<pyximport.pyximport.PyImporter at 0x7f5b6102f490>, None)

In [5]:
import gzip
import pickle

In [6]:
import gc
gc.collect()

270

## solve games

In [None]:
from a_star_search import search_way
from tqdm import tqdm_notebook
import time
import numpy as np
from scipy.special._ufuncs import binom

actions_len_list = []
explored_len_list = []
possible_states_list = []
solve_time = []
results = []


with gzip.open('data/boxban-medium.pkl.gz', 'rb') as f:
    envs = pickle.load(f)

for env in tqdm_notebook(envs):
    start_time = time.time()
    result, explored_len = search_way(env, model, progress_bar_update_iterations=10000)
    if type(result) is list and len(result) > 0:
        solve_time.append(time.time() - start_time)
        actions_len_list.append(len(result))
        explored_len_list.append(len(result) / float(explored_len))
        x = sum((env.room_state > 0).flatten())
        possible_states_list.append(len(result) / x * binom(x-1, env.num_boxes))
        results.append(result)
    else:
        print('ERROR, could not find a path 😢')
        
    all_stuff = {
        'actions_len_list': actions_len_list,
        'explored_len_list': explored_len_list,
        'possible_states_list': possible_states_list,
        'solve_time': solve_time,
        'results': results
    }

    with gzip.open(f'stats/temp_boxban-medium.pkl.gz', 'wb') as f:
        pickle.dump(all_stuff, f, pickle.HIGHEST_PROTOCOL)
        

7.4.0


HBox(children=(IntProgress(value=0), HTML(value='')))

 76%|███████▌  | 903961/1186680 [2:47:37<51:45, 91.03it/s]  
 22%|██▏       | 476168/2179485 [1:30:03<5:11:09, 91.23it/s]
 24%|██▍       | 173934/712530 [31:17<1:36:05, 93.42it/s]
 59%|█████▉    | 126378/212520 [22:41<15:21, 93.48it/s]
  2%|▏         | 81996/3746990 [14:43<11:01:03, 92.40it/s]
 49%|████▊     | 49514/101745 [08:51<09:19, 93.27it/s]
 43%|████▎     | 33680/77520 [06:02<07:51, 92.93it/s]
 55%|█████▌    | 894816/1623160 [2:34:59<2:03:13, 98.52it/s]
 16%|█▌        | 52976/328900 [08:50<45:53, 100.19it/s]
 33%|███▎      | 1096352/3290040 [3:05:42<6:11:02, 98.54it/s]
 18%|█▊        | 127318/712530 [21:19<1:38:05, 99.43it/s]
 25%|██▌       | 32984/131670 [05:31<16:31, 99.57it/s]
 14%|█▎        | 36207/265650 [06:09<39:04, 97.87it/s]
 18%|█▊        | 87481/491400 [14:39<1:07:21, 99.94it/s]
 33%|███▎      | 110074/328900 [18:27<37:43, 96.67it/s]
 77%|███████▋  | 655780/849555 [1:50:19<32:36, 99.06it/s]  
  9%|▊         | 73673/849555 [12:19<2:09:23, 99.94it/s]
 11%|█         | 32

## statistics

In [15]:
print('actions_len_list', np.mean(actions_len_list))
print('explored_len_list', np.mean(explored_len_list) * 100)
print('possible_states_list', np.mean(possible_states_list))
print('solve_time', np.mean(solve_time))

actions_len_list 46.42
explored_len_list 0.10925841144649057
possible_states_list 1119187.8
solve_time 3702.696102924347


In [None]:
temp = []
explored_len_list_ = []
possible_states_list = []
for env, result, explored_len in zip(envs, results, explored_len_list):
    x = sum((env.room_state > 0).flatten())
    explored_states = 1 / explored_len * len(result)
    explored_len_list_.append(explored_states)
    possible_states = x * binom(x-1, env.num_boxes)
    possible_states_list.append(possible_states)
    temp.append(explored_states / possible_states * 100)


In [None]:
stats = {
    'actions_len_list': actions_len_list,
    'explored_len_list': explored_len_list_,
    'possible_states_list': possible_states_list,
    'solve_time': solve_time,
    'results': results
}

with gzip.open(f'stats/boxban-medium.pkl.gz', 'wb') as f:
    pickle.dump(stats, f, pickle.HIGHEST_PROTOCOL)

## plots

In [None]:
import matplotlib.pyplot as plt

In [None]:
fig = plt.figure(figsize=(25, 25))
for idx, env in enumerate(envs):
    img = env.render(mode='rgb_array')
    fig.add_subplot(10, 10, idx + 1)
    plt.axis('off')
    plt.title(f'boxban-medium {idx + 1}')
    plt.imshow(img)
plt.axis('off')
fig.tight_layout()
plt.savefig("plots/boxban-medium_unsolved.pdf")
plt.show()

In [None]:
from copy import deepcopy

fig = plt.figure(figsize=(25, 25))
for idx, (env, actions) in enumerate(zip(envs, results)):
    env = deepcopy(env)
    
    step = 0
    for action in actions:
        next_state, reward, done, _ = env.step(action)
        step += 1
    
    img = env.render(mode='rgb_array')
    fig.add_subplot(10, 10, idx + 1)
    plt.axis('off')
    plt.title(f'boxban-medium {idx + 1}')
    plt.imshow(img)
plt.axis('off')
fig.tight_layout()
plt.savefig("plots/boxban-medium_solved.pdf")
plt.show()

In [14]:
import datetime
print(datetime.datetime.utcnow())

2019-09-18 16:55:33.896081
