### Access Workspace

In [1]:
from azureml.core import Workspace

# Access the Workspace`
ws = Workspace.from_config("./config")

### Create custom environment

In [33]:
from azureml.core import Environment
from azureml.core.environment import CondaDependencies

# Create the environment
myenv = Environment(name="MyEnvironment")

# Create the dependencies object
myenv_dep = CondaDependencies.create(conda_packages=['scikit-learn','pandas','azureml-defaults','joblib'])

myenv.python.conda_dependencies = myenv_dep

# Register the environment
myenv.register(ws)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211124.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "MyEnvironment",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "anaconda",
                "conda-forge"


### Create a compute cluster for pipeline

In [3]:
cluster_name = "pipeline-cluster"

from azureml.core.compute import AmlCompute
compute_config = AmlCompute.provisioning_configuration(
                                    vm_size='STANDARD_D11_V2', 
                                    max_nodes=2)


from azureml.core.compute import ComputeTarget
compute_cluster = ComputeTarget.create(ws, cluster_name, compute_config)

compute_cluster.wait_for_completion()

Provisioning operation finished, operation "Succeeded"


### Create Run Configurations for the steps

In [4]:
from azureml.core.runconfig import RunConfiguration
run_config = RunConfiguration()

run_config.target = compute_cluster
run_config.environment = myenv

### Create the experiment

In [5]:
from azureml.core import Experiment
experiment = Experiment(workspace=ws, name='PipelineExp01')

### Access Datastore and Dataset

In [6]:
from azureml.core import Datastore, Dataset

In [7]:
az_store = Datastore.get(ws, "azuremlds01")

In [8]:
# Path for data
csv_path = [(az_store, "Loan+Approval+Prediction.csv")]
loan_dataset = Dataset.Tabular.from_delimited_files(path=csv_path)
# Register the dataset
loan_dataset = loan_dataset.register(workspace=ws,
                                     name="Loan Applications Using SDK",
                                     create_new_version=True)

df = loan_dataset.to_pandas_dataframe()

### Data Preparation

In [9]:
import os
import sys
sys.path.append(".")
from Model_Preparation import clean_missing_data,preprocessing,train
from azureml.core import Run
import os
import argparse
import joblib
import json

In [10]:
import sys
sys.argv=['']
del sys

In [11]:
parser = argparse.ArgumentParser("train")

parser.add_argument('--model_folder', type=str, dest='model_folder', default="model", help='model location')
parser.add_argument("--model_name",type=str,help="Name of the Model",default="RandomForest.pkl",)

args = parser.parse_args()

print("Argument [model_folder]: %s" % args.model_folder)
print("Argument [model_name]: %s" % args.model_name)
model_folder = args.model_folder
model_name = args.model_name


Argument [model_folder]: model
Argument [model_name]: RandomForest.pkl


In [12]:
run = Run.get_context()

In [13]:
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,False,0,Graduate,False,5849,0.0,,360.0,1.0,Urban,True
1,LP001003,Male,True,1,Graduate,False,4583,1508.0,128.0,360.0,1.0,Rural,False
2,LP001005,Male,True,0,Graduate,True,3000,0.0,66.0,360.0,1.0,Urban,True
3,LP001006,Male,True,0,Not Graduate,False,2583,2358.0,120.0,360.0,1.0,Urban,True
4,LP001008,Male,False,0,Graduate,False,6000,0.0,141.0,360.0,1.0,Urban,True


In [14]:
df.shape

(614, 13)

In [15]:
df = clean_missing_data(df)

In [16]:
X,Y = preprocessing(df)

### Model Training

In [17]:
model,Confusion_Matrix,Score = train(X,Y)

  model = rfc.fit(X_train, Y_train)


### Saving the model

In [18]:
os.makedirs('outputs', exist_ok=True)
path = os.path.join('outputs',model_name)
joblib.dump(value=[model],
            filename=path)

['outputs/RandomForest.pkl']

### Logging the Experiment

In [19]:
cm_dict = {"schema_type": "confusion_matrix",
           "schema_version": "v1",
           "data": {"class_labels": ["N", "Y"],
                    "matrix": Confusion_Matrix.tolist()}
           }

In [20]:
# Starting the Experiment
run = experiment.start_logging()

In [21]:


run.log("TotalObservations", len(X))
run.log_confusion_matrix("ConfusionMatrix", cm_dict)
run.log("Score", Score)


In [22]:
run.complete()

### Model Registration

In [23]:
from azureml.core.model import Model

model = Model.register(workspace=ws, model_path= 'outputs/RandomForest.pkl', model_name="loan-prediction-model",tags = {"version": "1"},
              description = "Loan Classification")

Registering model loan-prediction-model
