Use this notebook to retrieve a training instance for which you know the seed. Useful for debugging if e.g. training stopped when encountered a certain instance, and you need to retrieve this instance to try to understand what caused the crash.

In [None]:
%load_ext autoreload
%autoreload
from retro_branching.environments import EcoleBranching, EcoleConfiguring
from retro_branching.agents import StrongBranchingAgent, PseudocostBranchingAgent, RandomAgent
from retro_branching.utils import seed_stochastic_modules_globally

import ecole
import numpy as np
import copy

import networkx as nx
import matplotlib.pyplot as plt
from networkx.drawing.nx_pydot import graphviz_layout
# from networkx.drawing.nx_agraph import graphviz_layout

seed = 0 # 1
seed_stochastic_modules_globally(default_seed=seed)

In [None]:
%autoreload
agent = RandomAgent()

env = EcoleBranching(observation_function='43_var_features',
                      information_function='default',
                      reward_function='default',
                      scip_params='default')
env.seed(seed)

instances = ecole.instance.SetCoverGenerator(n_rows=500, n_cols=1000, density=0.05)

First, initialise the buffer. Assuming init_epsilon=1, the actions taken to initialise the buffer will have been taken with the random exploration agent. 

In [None]:
buffer_min_length = 20000
buffer_curr_length = 0
buffer_instance_counter = 0

env_ready = False
while buffer_curr_length < buffer_min_length:
    if not env_ready:
        # reset env
        obs = None
        while obs is None:
            env.seed(seed)
            instance = next(instances)
            buffer_instance_counter += 1
            agent.before_reset(instance)
            obs, action_set, reward, done, info = env.reset(instance)
        env_ready = True
    
    # act
    action, action_idx = agent.action_select(action_set)
    obs, action_set, reward, done, info = env.step(action)
    
    # experience added to buffer
    buffer_curr_length += 1
    print(f'Buffer: {buffer_curr_length}/{buffer_min_length} | Instance counter: {buffer_instance_counter}')
    
    if done:
        env_ready = False

Now that we have filled the buffer, we can simply loop through the instances until we reach the instance of the target episode

In [None]:
episode_counter = 0
target_episode = 2642
reset_instance_counter = 0

env_ready = False
while episode_counter <= target_episode:
    if not env_ready:
        # reset env
        obs = None
        while obs is None:
            env.seed(seed)
            instance = next(instances)
            if episode_counter == target_episode:
                print(f'Reached episode {episode_counter}, saving instance before reset')
                instance.write_problem(f'instance_{episode_counter}.mps')
            agent.before_reset(instance)
            obs, action_set, reward, done, info = env.reset(instance)
            reset_instance_counter += 1
        # env reset, this is an episode
        print(f'Episode: {episode_counter}/{target_episode} | Instance counter: {reset_instance_counter}')
        episode_counter += 1

In [None]:
print(f'Total number of instances needed to be looped through to reach episode {target_episode}: {buffer_instance_counter+reset_instance_counter}')

In [None]:
print(obs)
print(done)