- Set tracking URI
- Create experiment
- Create train and test sets
- Train while logging metrics and artifacts. 
    - Train script should use mlflow to log model HP values, errors, save final D_init, W, embedding after each epoch

TODO:
- Modify train.py if necessary
- Make sure can run train and log parameters with current formulation
- Run with different sets of hyperparameters

In [30]:
from rdkit import Chem, Geometry

# load data (don't remove hydrogens and don't sanitise) TODO? functionise this
reactant_file = "../create_figs/model_data/train_reactants.sdf"
train_r = Chem.SDMolSupplier(reactant_file, removeHs=False, sanitize=False)
train_r = [r for r in train_r]        

ts_file = "../create_figs/model_data/train_ts.sdf"
train_ts = Chem.SDMolSupplier(ts_file, removeHs=False, sanitize=False)
train_ts = [ts for ts in train_ts]        

product_file = "../create_figs/model_data/train_products.sdf"
train_p = Chem.SDMolSupplier(product_file, removeHs=False, sanitize=False)
train_p = [p for p in train_p]        

train_data = list(zip(train_r, train_ts, train_p))

num_train = len(train_data)
num_valid = int(round(num_train / 8))

# train:val splits
data_train = train_data[ :num_train - num_valid]
data_val = train_data[num_train - num_valid: ]

In [18]:
import tensorflow as tf
import numpy as np

elements = "HCNO"
num_elements = len(elements)
max_size = 21

def prepare_batch(batch_mols):

    # Initialization
    size = len(batch_mols)
    V = np.zeros((size, max_size, num_elements + 1), dtype=np.float32)
    E = np.zeros((size, max_size, max_size, 3), dtype=np.float32)
    sizes = np.zeros(size, dtype=np.int32)
    coordinates = np.zeros((size, max_size, 3), dtype=np.float32)

    # Build atom features
    for bx in range(size):
        reactant, ts, product = batch_mols[bx]
        N_atoms = reactant.GetNumAtoms()
        sizes[bx] = int(N_atoms)

        # Topological distances matrix
        MAX_D = 10.
        D = (Chem.GetDistanceMatrix(reactant) + Chem.GetDistanceMatrix(product)) / 2
        D[D > MAX_D] = 10.

        D_3D_rbf = np.exp(-((Chem.Get3DDistanceMatrix(reactant) + Chem.Get3DDistanceMatrix(product)) / 2))  # squared

        for i in range(N_atoms):
            # Edge features
            for j in range(N_atoms):
                E[bx, i, j, 2] = D_3D_rbf[i][j]
                if D[i][j] == 1.:  # if stays bonded
                    if reactant.GetBondBetweenAtoms(i, j).GetIsAromatic():
                        E[bx, i, j, 0] = 1.
                    E[bx, i, j, 1] = 1.

            # Recover coordinates; adapted for all
            # for k, mol_typ in enumerate([reactant, ts, product]):
            pos = ts.GetConformer().GetAtomPosition(i)
            np.asarray([pos.x, pos.y, pos.z])
            coordinates[bx, i, :] = np.asarray([pos.x, pos.y, pos.z])

            # Node features
            atom = reactant.GetAtomWithIdx(i)
            e_ix = elements.index(atom.GetSymbol())
            V[bx, i, e_ix] = 1.
            V[bx, i, num_elements] = atom.GetAtomicNum() / 10.
            # V[bx, i, num_elements + 1] = atom.GetExplicitValence() / 10.

    # print(np.sum(np.square(V)),np.sum(np.square(E)), sizes)
    batch_dict = {
        "nodes": tf.constant(V),
        "edges": tf.constant(E),
        "sizes": tf.constant(sizes),
        "coordinates": tf.constant(coordinates)
    }
    return batch_dict

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [5]:
import sys, os
import mlflow
import mlflow.azureml

import azureml.core
from azureml.core import Workspace

ws = Workspace.from_config()

print("SDK version:", azureml.core.VERSION)
print("MLflow version:", mlflow.version.VERSION)
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

SDK version: 1.28.0
MLflow version: 1.17.0
ReactionModelling
ResearchProj
uksouth
4ba7b086-969d-41c4-a647-2784cde6af4b


In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your CPU cluster
cpu_cluster_name = "cpucluster"

# verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace = ws, name = cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [6]:
# set tracking URI
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())

# create mlflow experiment
experiment_name = "train-tsgen"
mlflow.set_experiment(experiment_name)

# create backend config object
backend_config = {"COMPUTE": "cpucluster", "USE_CONDA": False}

In [33]:
# submit run
remote_mlflow_run = mlflow.projects.run(uri=".", 
                                    parameters={"layers": 2, "hidden_size": 128, "iterations": 3, "batch_size", 8},
                                    backend = "azureml",
                                    backend_config = backend_config,
                                    synchronous=True)

Class AzureMLProjectBackend: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.


ExecutionException: Could not find main among entry points [] or interpret main as a runnable script. Supported script file extensions: ['.py', '.sh']

In [None]:
# view metrics and artifacts in your workspace
run.get_metrics()

# once run complete
# the model folder produced from the run is registered. This includes the MLmodel file, model.pkl and the conda.yaml.
run.register_model(model_name = 'my-model', model_path = 'model')

# then view registered model in worksapce with aml studio

Usage: ipykernel_launcher.py [options]

ipykernel_launcher.py: error: no such option: -f


SystemExit: 2

In [11]:
from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal
from azureml.train.hyperdrive import choice, loguniform

In [12]:
ps = RandomParameterSampling(
    {
        '--batch_size': choice(), # 8
        '--hidden_size': choice(), # 128
        '--layers': choice(), #2
        '--iterations': choice() # 3
    }
)

# BanditPolicy checks job every (evaluation_interval) number of iterations terminating the job if primary metric outside of slack_factor
early_term_policy = BanditPolicy(evaluation_interval = 2, slack_factor = 0.1)

# HyperDriveConfig
hdc = HyperDriveConfig(estimator = est, hyperparameter_sampling = ps, 
                       policy = early_term_policy, primary_metric_name = 'Accuracy',
                       primary_metric_goal = PrimaryMetricGoal.MAXIMIZE, 
                       max_total_runs = 20, max_concurrent_runs = 4)


HyperDriveConfigException: HyperDriveConfigException:
	Message: Please specify an input for choice.
	InnerException None
	ErrorResponse 
{
    "error": {
        "code": "UserError",
        "message": "Please specify an input for choice.",
        "details_uri": "https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.hyperdrive.hyperdriveconfig?view=azure-ml-py",
        "target": "options",
        "inner_error": {
            "code": "BadArgument",
            "inner_error": {
                "code": "ArgumentBlankOrEmpty"
            }
        }
    }
}