## Connect to AML workspace & access data

In [None]:
from azureml.core import Workspace, Dataset

ws = Workspace.from_config()

In [None]:
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

In [None]:
dataset = Dataset.get_by_name(ws, name='german_credit_dataset')
ds_df = dataset.to_pandas_dataframe()

In [None]:
ds_df.head()

In [None]:
import matplotlib.pyplot as plt

ds_df.plot(kind='scatter', x='Age', y='Credit amount')
plt.show()

## Train model

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

ds_df.drop("Sno", axis=1, inplace=True)

y_raw = ds_df['Risk']
X_raw = ds_df.drop('Risk', axis=1)

categorical_features = X_raw.select_dtypes(include=['object']).columns
numeric_features = X_raw.select_dtypes(include=['int64', 'float']).columns

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value="missing")),
    ('onehotencoder', OneHotEncoder(categories='auto', sparse=False))])

numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())])

feature_engineering_pipeline = ColumnTransformer(
    transformers=[
        ('numeric', numeric_transformer, numeric_features),
        ('categorical', categorical_transformer, categorical_features)
    ], remainder="drop")

# Encode Labels
le = LabelEncoder()
encoded_y = le.fit_transform(y_raw)

In [None]:
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X_raw, encoded_y, test_size=0.20, stratify=encoded_y, random_state=42)

# Create sklearn pipeline
lr_clf = Pipeline(steps=[('preprocessor', feature_engineering_pipeline),
                         ('classifier', LogisticRegression(solver="lbfgs"))])

In [None]:
from azureml.core import Experiment

experiment_name = 'german_credit_data_local'
experiment = Experiment(ws, experiment_name)

run = experiment.start_logging()

# Train the model
lr_clf.fit(X_train, y_train)

# Capture metrics
train_acc = lr_clf.score(X_train, y_train)
test_acc = lr_clf.score(X_test, y_test)
print("Training accuracy: %.3f" % train_acc)
print("Test data accuracy: %.3f" % test_acc)

# Log to Azure ML
run.log('Train accuracy', train_acc)
run.log('Test accuracy', test_acc)
    
run.complete()

In [None]:
from sklearn.externals import joblib

joblib.dump(value=lr_clf, filename='model.pkl')

# Upload our model to our experiment
run.upload_file(name = 'outputs/model.pkl', path_or_stream = './model.pkl')

In [None]:
model = run.register_model(model_name='german-credit-local-model',
                           model_path='outputs/model.pkl',
                           datasets=[['training-dataset', dataset]],
                           tags={"source": "local_training_demo"})

## Calculate Model Explaination

In [None]:
# Explain model
from azureml.core.run import Run
from azureml.interpret import ExplanationClient
from interpret.ext.blackbox import TabularExplainer

client = ExplanationClient.from_run(run)

explainer = TabularExplainer(lr_clf.steps[-1][1], 
                             initialization_examples=X_train, 
                             features=X_raw.columns, 
                             classes=["Good", "Bad"], 
                             transformations=feature_engineering_pipeline)

# explain overall model predictions (global explanation)
global_explanation = explainer.explain_global(X_test)

# Sorted SHAP values
print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))
# Corresponding feature names
print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))
# Feature ranks (based on original order of features)
print('global importance rank: {}'.format(global_explanation.global_importance_rank))

client = ExplanationClient.from_run(run)
client.upload_model_explanation(global_explanation, comment='Global Explanation: All Features')
