In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import warnings
warnings.filterwarnings("ignore")

import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')

import numpy as np
np.set_printoptions(suppress=True)

from src.envs import ScoutingRuns, SingleStepScoutingRuns

from helper_functions import run_env
from helper_functions import print_results
from helper_functions import plot_results

In [None]:
# Load saved agent (may show some warnings, but will run fine)
# Agent (actor/policy) selects action as follows: action = agent(state)
name = 'ddpg_agent'
model_path = f'../outputs/{name}_best_policy'
agent = tf.saved_model.load(model_path)

# For envs, specify the same parameters that was used when training:
num_scouting_runs = 3
penalty = 0.1
enforce_constraints = False 
stop_action = False 

In [None]:
# Some arbitrary compounds (sets of NK parameters)
compounds = [
    # s1, s2, kw
    np.array([15, 1.0, 100]),
    np.array([28, 1.5, 3000]),
    np.array([37, 1.8, 22000]),
    np.array([45, 2.1, 40000]),
]


# Perform scouting runs for each compound, and visualize results.
for compound in compounds:

    env = ScoutingRuns(
        max_scouting_runs=num_scouting_runs,
        penalty=penalty,
        enforce_constraints=enforce_constraints,
        stop_action=stop_action,
        tf_summary_path=None
    )

    # env = SingleStepScoutingRuns(
    #     num_scouting_runs=num_scouting_runs,
    #     penalty=penalty, 
    #     enforce_constraints=enforce_constraints,
    #     tf_summary_path=None
    # )

    initial_state, _ = env.reset(compound)                                   

    env = run_env(env, agent, initial_state)
    
    print_results(env)
    plot_results(env)
    
    print('+++' * 20, end='\n\n')
    