# ML Flow on Azure ML

The ML ops demo notebook shows running ML Flow on a local machine, and the AzureML notebook demonstrates using the Azure ML SDK for experiment tracking. This notebook combines the two, using AzureML to run, but tracking through the ML Flow API with AzureML providing the backend storage. This allows us to make use of the easily scaling  infrastructure of AzureML, while the code is still portable as other backends can easily be swapped in when required.


In [None]:
import pandas as pd
import numpy as np
import pathlib
import matplotlib.pyplot as plt

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
%load_ext tensorboard

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Activation, Flatten
from tensorflow.keras.layers import Conv1D, concatenate
from tensorflow.keras.layers import ZeroPadding1D, Reshape, Input, Dropout, PReLU
from tensorflow.keras.models import Sequential, Model

from sklearn.metrics import mean_absolute_error, r2_score

In [None]:
import mlflow
mlflow.tensorflow.autolog()

In [None]:
import azureml
import azureml.core

In [None]:
import prd_pipeline

In [None]:
import importlib

In [None]:
importlib.reload(prd_pipeline)

### Load data

In [None]:
train202208_datastore_name = 'precip_rediagnosis_train202208'
prd_prefix = 'prd'
merged_prefix = prd_prefix + '_merged'
csv_file_suffix = 'csv'

In [None]:
# prd_merged_file_dataset_name = 'prd_merged_csv_files'
prd_merged_file_dataset_name = 'prd_merged_2022_storm_eunice_franklin_files'


In [None]:
azure_experiment_name='prd_mlops_test'
azure_env_name = 'prd_ml_cluster'
cluster_name = 'mlops-test'

In [None]:
prd_model_name = 'azml_mlops_202208'

In [None]:
prd_all_events_dataset_name = 'prd_merged_all_events_files'

In [None]:
target_parameter = 'rainfall_rate'
profile_features = ['air_temperature', 'relative_humidity']
single_lvl_features = ['air_pressure_at_sea_level'] 

In [None]:
prd_ws = azureml.core.Workspace.from_config()

In [None]:
mlflow.set_tracking_uri(prd_ws.get_mlflow_tracking_uri())

In [None]:
%%time
input_data = prd_pipeline.load_data(
    current_ws=prd_ws,
    dataset_name=prd_all_events_dataset_name
)


In [None]:
[c1 for c1 in input_data.columns if 'rain' in c1]

In [None]:
%%time
data_splits, data_dims = prd_pipeline.preprocess_data(
    input_data=input_data,
    test_fraction=0.2,
    feature_dict={'profile': profile_features, 'single_level': single_lvl_features,'target': target_parameter,},
)

In [None]:
nprof_features = data_dims['nprof_features'] 
nheights = data_dims['nheights']
nsinglvl_features = data_dims['nsinglvl_features']

In [None]:
prd_experiment_name = 'prd_exp_azml_mlflow'

In [None]:
exp1 = mlflow.create_experiment(prd_experiment_name)
exp1

In [None]:
exp1 = mlflow.get_experiment(exp1)

In [None]:
exp1

In [None]:
import datetime
log_dir = 'log/fit/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S')

In [None]:
tensorflow_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# run tensorboard --logdir LOGDIRPATH from command line to launch 

In [None]:
import tempfile

In [None]:
from azureml.tensorboard import Tensorboard


In [None]:
data_splits.keys()

In [None]:
data_dims

In [None]:
current_run = mlflow.start_run(experiment_id=exp1.experiment_id)


In [None]:
current_run

In [None]:
# current run can only be obtained within a run. Once the mlflow run has been created, we will need to call Run.get_context to get ther azureml run
azml_current_run = list(azureml.core.Experiment.list(prd_ws, prd_experiment_name)[0].get_runs())[0]
azml_current_run

In [None]:
prd_tb = Tensorboard([azml_current_run])

# If successful, start() returns a string with the URI of the instance.
prd_tb_uri = prd_tb.start()

In [None]:
model = prd_pipeline.build_model(nprof_features, nheights, nsinglvl_features)
model.summary()

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss='mean_absolute_error', optimizer=optimizer)

In [None]:
history = model.fit(data_splits['X_train'], 
                    data_splits['y_train'], 
                    epochs=50, 
                    batch_size=128, 
                    validation_data=(data_splits['X_val'], 
                                     data_splits['y_val']), 
                    verbose=True, 
                    callbacks=[tensorflow_callback])


If we look at the experiment in AzureML GUI, we see that all the model parameters have been automatically logged, and the model has been saved by ML Flow ready for use in inference.

In [None]:
y_pred = model.predict(data_splits['X_val'])
error = mean_absolute_error(data_splits['y_val'], y_pred)
print(f'MAE: {error:.3f}')
mlflow.log_metric('MAE', error)


In [None]:
rsqrd = r2_score(data_splits['y_val'], y_pred)
print(f'R-squared score: {rsqrd:.3f}')
mlflow.log_metric('R-squared', rsqrd)

In [None]:
fig1 = plt.figure(figsize=(10, 8))
ax1 = fig1.add_subplot(1,1,1)
ax1.scatter(data_splits['y_val'], y_pred, s=200, c='darkblue')
ax1.plot([0, 300], [0, 300], ls="--", c=".3")
ax1.set_xlabel('Actual 3hr precip accumulation value')
ax1.set_ylabel('Predicted 3hr precip_accumulation value')
#     with tempfile.TemporaryDirectory() as td1:

#         fig1.savefig(plot_out_path, bbox_inches='tight')
mlflow.log_figure(fig1,  'actual_predicted_precip_3hr.png')    

In [None]:
fig2 = plt.figure(figsize=(20, 8))
ax1 = fig2.add_subplot(1,2,1, title='Training. vs validation set loss  (mean absolute error)')
ax1.plot(training_hist_df.epoch, training_hist_df.loss, label='training')
ax1.plot(training_hist_df.epoch, training_hist_df.val_loss, c='g', label='validation')
ax1.legend()
ax1.set_ylabel('MAE [mm of precipitation]')
ax1.set_xlabel('epochs')
ax2 = fig2.add_subplot(1,2,2,title='Histogram of Actual vs Predicted preicpitation')
ax2.hist(data_splits['y_val'], alpha=0.5, bins=40, label='Actual')
ax2.hist(y_pred, alpha=0.5, bins=40, label='Predicted')
ax2.legend()    
mlflow.log_figure(fig2, 'training_loss_hist_plot.png')

In [None]:
# After your job completes, be sure to stop() the streaming otherwise it will continue to run. 
prd_tb.stop()

## Evaluation

In [None]:
training_hist_df = pd.DataFrame(history.history)
training_hist_df['epoch'] = history.epoch

In [None]:
print('hello')

In [None]:
fig1 = plt.figure(figsize=(20, 8))

ax1 = fig1.add_subplot(1,2,1, title='Training. vs validation set loss  (mean absolute error)')
ax1.plot(training_hist_df.epoch, training_hist_df.loss, label='training')
ax1.plot(training_hist_df.epoch, training_hist_df.val_loss, c='g', label='validation')
ax1.legend()
ax1.set_ylabel('MAE [mm of precipitation]')
ax1.set_xlabel('epochs')

ax2 = fig1.add_subplot(1,2,2,title='Histogram of Actual vs Predicted preicpitation')
ax2.hist(data_splits['y_val'], alpha=0.5, bins=40, label='Actual')
ax2.hist(y_pred, alpha=0.5, bins=40, label='Predicted')
ax2.legend()

# fig1.savefig('training_loss_hist_plot.png',bbox_inches='tight')

In [None]:
prd_ws.get_run('50d5690b-456d-4f9c-b2fe-e0570445f6ab').log_image('training_loss_hist_plot', 'training_loss_hist_plot.png')