In [1]:
import pandas as pd
import numpy as np
import warnings
from math import sqrt
warnings.filterwarnings('ignore')
from azureml.core.run import Run
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.model import Model
from azureml.core.authentication import ServicePrincipalAuthentication
from azureml.train.automl import AutoMLConfig
import pickle
from matplotlib import pyplot as plt
from matplotlib.pyplot import figure
import mlflow
from mlflow.tracking import MlflowClient


In [2]:
from azureml.core import Workspace, Dataset

subscription_id = "bc3f42af-98e7-4062-b5e3-7afdf0959110"
resource_group = 'Learn_MLOps'
workspace_name = 'MLOps_WS'

workspace = Workspace(subscription_id, resource_group, workspace_name)

In [3]:
workspace

Workspace.create(name='MLOps_WS', subscription_id='bc3f42af-98e7-4062-b5e3-7afdf0959110', resource_group='Learn_MLOps')

In [4]:
uri = workspace.get_mlflow_tracking_uri()
mlflow.set_tracking_uri(uri)

In [11]:
# Importing pre-processed dataset
dataset = Dataset.get_by_name(workspace, name='processed_weather_data_portofTurku')
print(dataset.name, dataset.version)

processed_weather_data_portofTurku 1


In [12]:
df = dataset.to_pandas_dataframe()

In [14]:
df=pd.read_csv("./data/weather_dataset_processed.csv")
df.head()


Unnamed: 0,Timestamp,Location,Temperature_C,Humidity,Wind_speed_kmph,Wind_bearing_degrees,Visibility_km,Pressure_millibars,Weather_conditions
0,2006-04-01 00:00:00+02:00,"Port of Turku, Finland",9.472222,0.89,14.1197,251,15.8263,1015.13,0
1,2006-04-01 01:00:00+02:00,"Port of Turku, Finland",9.355556,0.86,14.2646,259,15.8263,1015.63,0
2,2006-04-01 02:00:00+02:00,"Port of Turku, Finland",9.377778,0.89,3.9284,204,14.9569,1015.94,0
3,2006-04-01 03:00:00+02:00,"Port of Turku, Finland",8.288889,0.83,14.1036,269,15.8263,1016.41,0
4,2006-04-01 04:00:00+02:00,"Port of Turku, Finland",8.755556,0.83,11.0446,259,15.8263,1016.51,0


# Spliting Pre-Processed data into Training and Validation datasets

In [None]:
# Validation set is used later to evaluate model performance post training. 

In [15]:
df_training = df.iloc[:77160]

In [16]:
df_training.shape

(77160, 9)

In [17]:
df_validation = df.drop(df_training.index)

In [18]:
df_validation.shape

(19293, 9)

# Registering Training and Validation data to the datastore on the workspace. 

In [19]:
!mkdir Data

A subdirectory or file Data already exists.


In [22]:
df_training.to_csv('data/training_data.csv',index=False)

In [24]:
df_validation.to_csv('data/validation_data.csv',index=False)

In [25]:
datastore = workspace.get_default_datastore()

In [27]:
datastore.upload(src_dir='data', target_path='data')

Uploading an estimated of 4 files
Target already exists. Skipping upload for data\training_data.csv
Target already exists. Skipping upload for data\validation_data.csv
Target already exists. Skipping upload for data\weather_dataset_processed.csv
Target already exists. Skipping upload for data\weather_dataset_raw.csv
Uploaded 0 files


$AZUREML_DATAREFERENCE_5fc597b371564011a62ce4c775ff1a7d

In [28]:
training_dataset = Dataset.Tabular.from_delimited_files(datastore.path('data/training_data.csv'))

In [29]:
validation_dataset = Dataset.Tabular.from_delimited_files(datastore.path('data/validation_data.csv'))

In [30]:
training_ds = training_dataset.register(workspace=workspace,
                                 name='training_dataset',
                                 description='Dataset to use for ML training')

In [31]:
validation_ds = validation_dataset.register(workspace=workspace,
                                 name='validation_dataset',
                                 description='Dataset for validation ML models')

# Data ingestion step - Training dataset

In [32]:
dataset = Dataset.get_by_name(workspace, name='training_dataset')
print(dataset.name, dataset.version)

training_dataset 1


In [33]:
df = dataset.to_pandas_dataframe()

In [34]:
df.head()

Unnamed: 0,Timestamp,Location,Temperature_C,Humidity,Wind_speed_kmph,Wind_bearing_degrees,Visibility_km,Pressure_millibars,Weather_conditions
0,2006-03-31 22:00:00,"Port of Turku, Finland",9.472222,0.89,14.1197,251,15.8263,1015.13,0
1,2006-03-31 23:00:00,"Port of Turku, Finland",9.355556,0.86,14.2646,259,15.8263,1015.63,0
2,2006-04-01 00:00:00,"Port of Turku, Finland",9.377778,0.89,3.9284,204,14.9569,1015.94,0
3,2006-04-01 01:00:00,"Port of Turku, Finland",8.288889,0.83,14.1036,269,15.8263,1016.41,0
4,2006-04-01 02:00:00,"Port of Turku, Finland",8.755556,0.83,11.0446,259,15.8263,1016.51,0


In [35]:
df.shape

(77160, 9)

#### Feature Selection and scaling

In [36]:
X = df[['Temperature_C', 'Humidity', 'Wind_speed_kmph', 'Wind_bearing_degrees', 'Visibility_km', 'Pressure_millibars', 'Weather_conditions']].values
y = df['Weather_conditions'].values
y

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [37]:
# Splitting the Training dataset into Train and Test set for ML training
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

  LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0'


In [38]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [39]:
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Model training and Testing Step

## 1. Support Vector Machine

In [40]:
myexperiment = Experiment(workspace, "support-vector-machine")
mlflow.set_experiment("mlflow-support-vector-machine")


2022/12/26 15:42:18 INFO mlflow.tracking.fluent: Experiment with name 'mlflow-support-vector-machine' does not exist. Creating a new experiment.


<Experiment: artifact_location='', creation_time=1672049538932, experiment_id='09801c4b-cd2a-49ca-a0c2-edc50a5bbdaa', last_update_time=None, lifecycle_stage='active', name='mlflow-support-vector-machine', tags={}>

In [41]:
#from sklearn.svm import SVC
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, copy_X=True, fit_path=True,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  eps=np.finfo(np.float).eps, positive=False):
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  max_n_alphas=1000, n_jobs=None, eps=np.finfo(np.float).eps,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  max_n_alpha

In [42]:
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}

In [43]:
svc = svm.SVC()

In [44]:
# initialize a run in Azureml and mlflow experiments
run = myexperiment.start_logging()
mlflow.start_run()


run.log("dataset name", dataset.name)
run.log("dataset Version", dataset.version)

The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh()

All git commands will error until this is rectified.

$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - error|e|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



In [46]:
svc_grid = GridSearchCV(svc, parameters)

In [47]:
%%time
svc_grid.fit(X_train, y_train)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  test_folds = np.zeros(n_samples, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  fold_sizes = np.full(n_splits, n_samples // n_splits, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  test_mask = np.zeros(_num_samples(X), dtype=np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  fold_sizes = np.full(n_splits, n_samples // n_splits, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  test_mask = np.zeros(_num_samples(X), dtype=np.bool)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/

Wall time: 15.5 s


GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'kernel': ('linear', 'rbf'), 'C': [1, 10]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [48]:
svc_grid.get_params(deep=True)

{'cv': 'warn',
 'error_score': 'raise-deprecating',
 'estimator__C': 1.0,
 'estimator__cache_size': 200,
 'estimator__class_weight': None,
 'estimator__coef0': 0.0,
 'estimator__decision_function_shape': 'ovr',
 'estimator__degree': 3,
 'estimator__gamma': 'auto_deprecated',
 'estimator__kernel': 'rbf',
 'estimator__max_iter': -1,
 'estimator__probability': False,
 'estimator__random_state': None,
 'estimator__shrinking': True,
 'estimator__tol': 0.001,
 'estimator__verbose': False,
 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
   decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
   kernel='rbf', max_iter=-1, probability=False, random_state=None,
   shrinking=True, tol=0.001, verbose=False),
 'fit_params': None,
 'iid': 'warn',
 'n_jobs': None,
 'param_grid': {'kernel': ('linear', 'rbf'), 'C': [1, 10]},
 'pre_dispatch': '2*n_jobs',
 'refit': True,
 'return_train_score': 'warn',
 'scoring': None,
 'verbose': 0}

In [49]:
from sklearn.svm import SVC

In [50]:
svc = SVC(C=svc_grid.get_params(deep=True)['estimator__C'], kernel=svc_grid.get_params(deep=True)['estimator__kernel'])

In [51]:
svc.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [52]:
# Logging training parameters to AzureML and MLFlow experiments
run.log("C", svc_grid.get_params(deep=True)['estimator__C'])
run.log("Kernel", svc_grid.get_params(deep=True)['estimator__kernel'])

In [53]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [54]:
predicted_svc = svc.predict(X_test)

In [55]:
acc = accuracy_score(y_test, predicted_svc)

In [56]:
fscore = f1_score(y_test, predicted_svc, average="macro")
precision = precision_score(y_test, predicted_svc, average="macro")
recall = recall_score(y_test, predicted_svc, average="macro")

In [57]:
import git
repo = git.Repo(search_parent_directories=True)
sha = repo.head.object.hexsha

ImportError: Failed to initialize: Bad git executable.
The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh()

All git commands will error until this is rectified.

This initial warning can be silenced or aggravated in the future by setting the
$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - quiet|q|silence|s|none|n|0: for no warning or exception
    - warn|w|warning|1: for a printed warning
    - error|e|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet


In [58]:
# Log to AzureML and MLflow
run.log("Test_accuracy", acc)
run.log("Precision", precision)
run.log("Recall", recall)
run.log("F-Score", fscore)
#run.log("Git-sha", sha)

In [59]:
run.complete()
print ("run id:", run.id)

run id: 84c55ddf-b98c-42b9-a405-6bb94efa3890


In [60]:
mlflow.end_run()

In [61]:
run.get_metrics()

{'dataset name': 'training_dataset',
 'dataset Version': 1,
 'C': 1.0,
 'Kernel': 'rbf',
 'Test_accuracy': 1.0,
 'Precision': 1.0,
 'Recall': 1.0,
 'F-Score': 1.0}

In [62]:
workspace.get_details()

{'id': '/subscriptions/bc3f42af-98e7-4062-b5e3-7afdf0959110/resourceGroups/Learn_MLOps/providers/Microsoft.MachineLearningServices/workspaces/MLOps_WS',
 'name': 'MLOps_WS',
 'identity': {'principal_id': 'd2939331-dc6f-45ed-9667-5997cf33bcbb',
  'tenant_id': 'd65c48be-4823-41e6-96bd-330f426b808e',
  'type': 'SystemAssigned'},
 'location': 'eastus2',
 'type': 'Microsoft.MachineLearningServices/workspaces',
 'tags': {},
 'sku': 'Basic',
 'workspaceid': '704e69c1-83ee-4770-9944-8e74d1af9fe5',
 'sdkTelemetryAppInsightsKey': 'e1f7b545-6243-4abf-ba76-c5691d2edb62',
 'description': '',
 'friendlyName': 'MLOps_WS',
 'containerRegistry': '',
 'keyVault': '/subscriptions/bc3f42af-98e7-4062-b5e3-7afdf0959110/resourceGroups/Learn_MLOps/providers/Microsoft.Keyvault/vaults/mlopsws1395667218',
 'applicationInsights': '/subscriptions/bc3f42af-98e7-4062-b5e3-7afdf0959110/resourceGroups/Learn_MLOps/providers/Microsoft.insights/components/mlopsws5198637541',
 'storageAccount': '/subscriptions/bc3f42af-98

In [63]:
import mlflow.sklearn
mlflow.sklearn.log_model(svc, 'outputs')

<mlflow.models.model.ModelInfo at 0x157697fb70>

Random Forest classifier 

In [65]:
myexperiment = Experiment(workspace, "random-forest-classifier")
mlflow.set_experiment("mlflow-random-forest-classifier")

<Experiment: artifact_location='', creation_time=1672049999739, experiment_id='6ce761a7-c5df-46d5-937f-ee8efdb96559', last_update_time=None, lifecycle_stage='active', name='mlflow-random-forest-classifier', tags={}>

In [66]:
from sklearn.ensemble import RandomForestClassifier

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from ._gradient_boosting import predict_stages
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from ._gradient_boosting import predict_stages


In [67]:
rf = RandomForestClassifier(max_depth=10, random_state=0, n_estimators=100)

In [70]:
# initialize runs in Azureml and mlflow
run = myexperiment.start_logging()
mlflow.end_run()
mlflow.start_run()


# Log dataset used 
run.log("dataset name", dataset.name)
run.log("dataset Version", dataset.version)

In [71]:
%%time
rf.fit(X_train, y_train)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_store_unique_indices = np.zeros(y.shape, dtype=np.int)
  if _joblib.__version__ >= LooseVersion('0.12'):
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)


Wall time: 6.24 s


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=10, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [72]:
# Logging training parameters to AzureML and MLFlow experiments
run.log("max_depth", 10)
run.log("random_state", 0)
run.log("n_estimators", 100)

In [73]:
predicted_rf = rf.predict(X_test)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype=np.int)
  if _joblib.__version__ >= LooseVersion('0.12'):


In [74]:
acc = accuracy_score(y_test, predicted_rf)
fscore = f1_score(y_test, predicted_rf, average="macro")
precision = precision_score(y_test, predicted_rf, average="macro")
recall = recall_score(y_test, predicted_rf, average="macro")

In [77]:
run.log("Test_accuracy", acc)
run.log("Precision", precision)
run.log("Recall", recall)
run.log("F-Score", fscore)
#run.log("Git-sha", sha)

In [78]:
run.complete()
print ("run id:", run.id)

run id: 861ccc93-ac44-4a0a-8674-d686920c7de4


In [79]:
mlflow.end_run()

In [80]:
run.get_metrics()

{'dataset name': 'training_dataset',
 'dataset Version': 1,
 'max_depth': 10,
 'random_state': 0,
 'n_estimators': 100,
 'Test_accuracy': [1.0, 1.0, 1.0],
 'Precision': [1.0, 1.0, 1.0],
 'Recall': [1.0, 1.0, 1.0],
 'F-Score': [1.0, 1.0, 1.0]}

# Model Packaging Step

pickle file or onnx

In [82]:
# Convert into SVC model into ONNX format file
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
initial_type = [('float_input', FloatTensorType([None, 6]))]
onx = convert_sklearn(svc, initial_types=initial_type)
with open("outputs/svc.onnx", "wb") as f:
    f.write(onx.SerializeToString())

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  EPS = np.finfo(np.float).eps


In [83]:
# Convert into RF model into ONNX format file
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
initial_type = [('float_input', FloatTensorType([None, 6]))]
onx = convert_sklearn(rf, initial_types=initial_type)
with open("outputs/rf.onnx", "wb") as f:
    f.write(onx.SerializeToString())

# Model Registering Step

In [84]:
# Register Model on AzureML WS
model = Model.register(model_path = './outputs/svc.onnx', # this points to a local file 
                       model_name = "support-vector-classifier", # this is the name the model is registered as
                       tags = {'dataset': dataset.name, 'version': dataset.version, 'hyparameter-C': '1', 'testdata-accuracy': '0.9519'}, 
                       model_framework='pandas==0.23.4',
                       description = "Support vector classifier to predict weather at port of Turku",
                       workspace = workspace)

print('Name:', model.name)
print('Version:', model.version)

Registering model support-vector-classifier
Name: support-vector-classifier
Version: 1


In [85]:
# Register Model on AzureML WS
model = Model.register(model_path = './outputs/rf.onnx', # this points to a local file 
                       model_name = "random-forest-classifier", # this is the name the model is registered as
                       tags = {'dataset': dataset.name, 'version': dataset.version, 'hyparameter-C': '1', 'testdata-accuracy': '0.9548'}, 
                       model_framework='pandas==0.23.4',
                       description = "Random forest classifier to predict weather at port of Turku",
                       workspace = workspace)

print('Name:', model.name)
print('Version:', model.version)

Registering model random-forest-classifier
Name: random-forest-classifier
Version: 1


In [86]:
import mlflow.sklearn

In [87]:
# Save the model to the outputs directory for capture
mlflow.sklearn.log_model(svc, 'outputs/svc.onnx')

<mlflow.models.model.ModelInfo at 0x157eb52be0>

In [88]:
# Save the model to the outputs directory for capture
mlflow.sklearn.log_model(rf, 'outputs/rf.onnx')

<mlflow.models.model.ModelInfo at 0x150175cc88>

# Save model artefacts

In [89]:
import pickle

with open('./outputs/scaler.pkl', 'wb') as scaler_pkl:
    pickle.dump(sc, scaler_pkl)

In [90]:
# Register Model on AzureML WS
scaler = Model.register(model_path = './outputs/scaler.pkl', # this points to a local file 
                       model_name = "scaler", # this is the name the model is registered as
                       tags = {'dataset': dataset.name, 'version': dataset.version}, 
                       model_framework='pandas==0.23.4',
                       description = "Scaler used for scaling incoming inference data",
                       workspace = workspace)

print('Name:', scaler.name)
print('Version:', scaler.version)

Registering model scaler
Name: scaler
Version: 1
