In [None]:
import os
import shutil

from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig
from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal, RandomParameterSampling, choice, uniform
from azureml.train.dnn import TensorFlow
from library.azure import services

In [None]:
create_new_env = False

In [None]:
# Initialise workspace from config file stored locally
ws = Workspace.from_config()
print('Workspace name: ' + ws.name,
      'Azure region: ' + ws.location,
      'Subscription id: ' + ws.subscription_id,
      'Resource group: ' + ws.resource_group, sep='\n')

In [None]:
# Create an Azure ML experiment and a local script folder
experiment_name = 'stock_remainder_penalty' #'price_diff_penalty'   #'more_training'
project_folder = './submitted_experiments/' + experiment_name
os.makedirs(project_folder, exist_ok=True)
exp = Experiment(workspace=ws, name=experiment_name)

In [None]:
# Copy the training script into the project directory
shutil.copy('./execute_experiment.ipynb', project_folder)
shutil.copy('./train_script.py', project_folder)
shutil.copy('./helper_functions.py', project_folder)
shutil.copy('./environment_functions.py', project_folder)
#TODO: add agent checkpoint folder to copy

In [None]:
# Select compute resource: instance name + nr
# e.g. the first STANDARD_D1 machine will be called STANDARD-D1-1 (no underscores allowed)
compute_name = 'STANDARD-NC6-1'  #'STANDARD-DS3-1' 
# Verify that cluster does not exist already
compute_target = services.get_compute(
    workspace=ws, 
    compute_name=compute_name, 
    vm_size="STANDARD_DS3_V2", #"STANDARD_NC6",
    min_nodes=1,
    max_nodes=4,
    #vm_priority="lowpriority"
)

In [None]:
# Use list with scriptrunconfig
script_params = [
    # Policy args
    '--policy', 'epsilon_greedy',  #'boltzmann_temperature',  #
#     '--boltzmann-temperature-start', 100.0,
#     '--boltzmann-temperature-end', 1.0,
    '--epsilon-greedy-start', 1.0,
    '--epsilon-greedy-end', 0.01,
    '--update-period', 4,
    '--decay-steps', 150000,
    '--exponential-decay-rate', 1.0,
    
    # Action args
    '--min-action', 30,
    '--max-action', 150,
    '--action-step', 3,
    '--comp-sellout-price', 120,
    '--early-termination-penalty', 10,  # Multiplied by nr of remaining days
    '--price-diff-penalty', 0.5,
#     '--stock-remainder-penalty', 50,
    
    # Training args
    '--replay-buffer-max-size', 10000,
    '--replay-buffer-batch-size', 1,
    '--discount', 1.0,
    '--sample-batch-size', 64,
    '--num-steps', 2,
    
    # Train duration args
    '--train-seasons', 1500,  #3000,
    '--early-stop-improvement-seasons', 500,
    '--early-stopping-patience', 1000,
    '--evaluation-nr-seasons', 300,
    
    # Neural net args
    '--hidden-units-layer1', 20,
    '--hidden-units-layer2', 40,
    '--learning-rate', 1e-3,
    '--beta-1', 0.9,
    '--beta-2', 0.999,
    
    # Plotting and saving
    '--plot-interval', 500,
    '--sample-seasons-for-plots', 5,
]

# GridParameterSampling or RandomParameterSampling
param_sampling = RandomParameterSampling({
    #'--discount': choice(0.99, 1.0),
#     '--early-termination-penalty': choice(0, 5, 10, 20),
#     '--learning-rate': choice(0.0025, 0.001, 0.0005)
#     '--exponential-decay-rate': choice(1.0, 0.3, 0.2, 0.1)
#     '--price-diff-penalty': choice(0., 0.25, 0.5, 0.75)
    '--stock-remainder-penalty': choice(0, 50, 100)
})

In [None]:
if create_new_env:

    env = Environment(name="customize_tensorflow_gpu_curated")

    dockerfile = """
    FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1
    RUN pip install --upgrade pip
    RUN python3 -m pip install --upgrade https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-2.7.0-cp37-cp37m-manylinux2010_x86_64.whl
    RUN pip install tf-agents==0.11.0 pandas matplotlib seaborn keras azureml azureml-core
    """

    env.docker.base_image = None
    env.docker.base_dockerfile = dockerfile
    env.python.user_managed_dependencies=True

    env.register(workspace=ws)

    build = env.build(workspace=ws)
    build.wait_for_completion(show_output=True)

In [None]:
env = Environment.get(workspace=ws, name="customize_tensorflow_gpu_curated")
env.environment_variables = {"AZUREML_COMPUTE_USE_COMMON_RUNTIME":"false"}

src = ScriptRunConfig(source_directory=project_folder, 
                      arguments=script_params,
                      script="train_script.py", 
                      compute_target=compute_target, 
                      environment=env)

# # Bandit policy for early stopping
# bandit_policy = BanditPolicy(evaluation_interval=2,
#                              slack_factor=0.1,
#                              delay_evaluation=5)

hdc = HyperDriveConfig(run_config=src,  # use either run config or estimator=estimator,
                       hyperparameter_sampling=param_sampling,
                       #policy=bandit_policy,
                       primary_metric_name="Evaluation_reward",
                       primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                       max_total_runs=4,  # dependent on hyper param space, how many combinations to try
                       max_concurrent_runs=4)  # Resource must be able to run this amount of runs in parallel

In [None]:
run = exp.submit(config=hdc)
# run.wait_for_completion(show_output=True) # comment out when you want to use your console