# ローカル環境でのモデル学習
ローカル環境でモデル学習をし、メトリックやモデルファイルをAzure Machine Learning serviceへ記録、共有します。

## Python SDKのバージョン確認

In [14]:
import azureml.core
print("SDK version:", azureml.core.VERSION)

SDK version: 1.0.15


## Workspace情報の取得

In [15]:
from azureml.core import Workspace, Experiment

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

exp = Experiment(workspace = ws, name = "devops-notebook")

Found the config file in: /Users/konabuta/Project/DevOpsLab/code/aml_config/config.json
Workspace name: azureml
Azure region: southeastasia
Subscription id: 9c0f91b8-eb2f-484c-979c-15848c098a6b
Resource group: amlservice


## メトリック記録開始

In [16]:
# Azure ML service メトリック取得
run = exp.start_logging()

## データ準備

In [17]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

# Sklearnサンプルデータの準備
X, y = load_diabetes(return_X_y = True)
columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
data = {
    "train":{"X": X_train, "y": y_train},        
    "test":{"X": X_test, "y": y_test}
}

## ハイパーパラメータの準備

In [19]:
import numpy as np

# ランダムにハイパーパラメータを選択
alphas = np.arange(0.0, 1.0, 0.05)
alpha=alphas[np.random.choice(alphas.shape[0], 1, replace=False)][0]
print(alpha)

0.65


## モデル学習

In [20]:
from sklearn.linear_model import Ridge

reg = Ridge(alpha = alpha)
reg.fit(data['train']['X'], data['train']['y'])

Ridge(alpha=0.65, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

## テストデータに対する予測値出力と精度確認

In [21]:
from sklearn.metrics import mean_squared_error

preds = reg.predict(data['test']['X'])
mse = mean_squared_error(preds, data['test']['y'])
print("平均二乗誤差:", mse)

平均二乗誤差: 3315.3568399622563


## Azure Machine Learning serviceにハイパーパラメータ、モデル精度を記録

In [22]:
# メトリック記録
run.log('alpha', alpha)
run.log('mse', mse)

## モデルファイルのアップロード

In [23]:
import os
import json
from sklearn.externals import joblib


# モデルファイルの保存
model_name = "diabetes-notebook.pkl"
os.makedirs('outputs', exist_ok=True)
with run:
    joblib.dump(value=reg, filename='outputs/' + model_name)

In [24]:
run.complete()

## 結果の確認

In [25]:
run

Experiment,Id,Type,Status,Details Page,Docs Page
devops-notebook,22e9a225-0894-4600-94b1-2e0661a9acce,,Completed,Link to Azure Portal,Link to Documentation


## モデルの登録

In [12]:
from azureml.core.model import Model

model = run.register_model(model_name='diabetes-notebook', 
                           model_path = 'outputs/' + model_name,
                           tags = {'area': "diabetes try modeling", 'type': "regression", 'run_id' : run.id}
                          )

In [13]:
print(model.name, model.version, model.tags)

diabetes-notebook 4 {'area': 'diabetes try modeling', 'type': 'regression', 'run_id': 'b3931f9c-b57e-4453-8a12-4c94dedae423'}
