In [25]:
from azureml.core import Workspace

# Access the Wrokspace
ws = Workspace.from_config()

In [30]:
import os, shutil
training_folder = 'loan'

os.makedirs(training_folder, exist_ok=True)

# Copy the data file into the experiment folder
shutil.copy('loan/Loan Approval Prediction.csv', os.path.join(training_folder, "loan.csv"))

'loan/loan.csv'

In [43]:
%%writefile $training_folder/loan_training.py
from azureml.core import Workspace, Run
import os
import joblib

#-------------------------------------------------------------#
# Initializing the experiment
#-------------------------------------------------------------#

# Get the context of the experiment run
new_run = Run.get_context()
#-------------------------------------------------------------#
# Machine Learning Model
#-------------------------------------------------------------#
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix


df = pd.read_csv('Loan Approval Prediction.csv')

LoanPrep = df[[ 'Married', 
                'Education', 
                'Self_Employed', 
                'ApplicantIncome', 
               'LoanAmount', 
               'Loan_Amount_Term', 
               'Credit_History', 
               'Loan_Status']]

LoanPrep = LoanPrep.dropna()
LoanPrep = pd.get_dummies(LoanPrep, drop_first = True)
                          
Y = LoanPrep[['Loan_Status_Y']]
X = LoanPrep.drop(['Loan_Status_Y'], axis = 1)
                          
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3, random_state = 1234)
                          
lr = LogisticRegression()
lr.fit(X_train, y_train)
                          
Y_predict = lr.predict(X_test)
Y_prob = lr.predict_proba(X_test)[:, 1]
                          
cm = confusion_matrix(y_test, Y_predict)
score = lr.score(X_test, y_test)

# Create the confusion matrix dictionary
cm_dict = {'schema_type': 'confusion_matrix',
          'schema_version': 'v1',
          'data': {'class_labels': ['N', 'Y'],
                  'matrix': [
                      cm.tolist()
                  ]}}
#------------------------------------------------------------------#
# Log Metrics and Complete an Experiment run
#------------------------------------------------------------------#
new_run.log('TotalObservations', len(df))
new_run.log('ConfusionMatrix', cm_dict)
new_run.log('Score', score)

X_test = X_test.reset_index(drop = True)
Y_test = y_test.reset_index(drop = True)

Y_prob_df = pd.DataFrame(Y_prob, columns = ['Scored Probabilities'])
Y_predict_df = pd.DataFrame(Y_predict, columns = ['Scored Label'])

scored_dataset = pd.concat([X_test, Y_test, Y_prob_df, Y_predict_df], axis = 1)

scored_dataset.to_csv('./outputs/Loan_scored.csv', index = False)

os.makedirs('outputs', exist_ok = True)
joblib.dump(value = lr, filename = 'outputs/loan_model.pkl')

new_run.complete()

Overwriting loan/loan_training.py


# Run the training script as an experimen

In [44]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.environment import CondaDependencies
from azureml.widgets import RunDetails

new_experiment = Experiment(workspace = ws, 
                           name = 'Training_Script')

# Create a Python environment for the experiment (from a .yml file)
myenv = Environment(name = 'MyEnvironment')

# Create the dependencies object
myenv_dep = CondaDependencies.create(conda_packages = ['scikit-learn', 'pandas'])
myenv.python.conda_dependencies = myenv_dep
myenv.register(ws)

# Create a script config
script_config = ScriptRunConfig(source_directory = training_folder,
                                script='loan_training.py',
                                environment = myenv) 

# submit the experiment run
new_run = new_experiment.submit(config=script_config)

# Show the running experiment run in the notebook widget
RunDetails(new_run).show()

# Block until the experiment run has completed
new_run.wait_for_completion()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

{'runId': 'Training_Script_1648111447_0bb86bb1',
 'target': 'local',
 'status': 'Finalizing',
 'startTimeUtc': '2022-03-24T08:44:08.229748Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '97fbe9c0-51b7-4268-b343-4c5c252f9fcb'},
 'inputDatasets': [],
 'outputDatasets': [],
 'runDefinition': {'script': 'loan_training.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': [],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataReferences': {},
  'data': {},
  'outputData': {},
  'datacaches': [],
  'jobName': None,
  'maxRunDurationSeconds': 2592000,
  'nodeCount': 1,
  'instanceTypes': [],
  'priority': None,
  'credentialPassthrough': False,
  'identity': None,
  'environment': {'name': 'MyEnvironment',
   'version': '2',
   'python': {'interpreterPath': 'python',
    'userManagedDependencies': False,
    'condaDependencies': {'channels': ['anaconda', 'conda-forge']

In [47]:
# Get logged metrics and files
metrics = new_run.get_metrics()
for key in metrics.keys():
        print(key, metrics.get(key))
print('\n')
for file in new_run.get_file_names():
    print(file)

TotalObservations 614
ConfusionMatrix {'schema_type': 'confusion_matrix', 'schema_version': 'v1', 'data': {'class_labels': ['N', 'Y'], 'matrix': [[[20, 28], [3, 100]]]}}
Score 0.7947019867549668


azureml-logs/60_control_log.txt
azureml-logs/70_driver_log.txt
logs/azureml/2809_azureml.log
outputs/Loan_scored.csv
outputs/loan_model.pkl
