# タクシー乗客数予測モデル構築（ローカルトレーニング編）
#### 参考１：https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/train-on-amlcompute/train-on-amlcompute.ipynb

In [1]:
#ローカル環境のスペック確認
!cat /proc/cpuinfo

'cat' は、内部コマンドまたは外部コマンド、
操作可能なプログラムまたはバッチ ファイルとして認識されていません。


## １．Azure ML Service用の準備

In [2]:
import azureml
from azureml.core import Workspace, Run

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.8


In [3]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, ws.location, sep = '\t')

Found the config file in: c:\usr\dev\notebook\config.json
hiouchiyamldemo	eastus	AzureMLserviceRG	eastus


In [4]:
experiment_name = 'ml-regression-local-taxi'

from azureml.core import Experiment
exp = Experiment(workspace=ws, name=experiment_name)

import pandas as pd

project_folder = './sample_projects/ml-regression-local-taxi'
output = {}
output['SDK version'] = azureml.core.VERSION
output['Subscription ID'] = ws.subscription_id
output['Workspace Name'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Project Directory'] = project_folder
output['Experiment Name'] = exp.name
pd.set_option('display.max_colwidth', -1)
outputDf = pd.DataFrame(data = output, index = [''])
outputDf.T

Unnamed: 0,Unnamed: 1
SDK version,1.0.8
Subscription ID,2c30e7ba-539b-4017-a5bc-5028d8e170ec
Workspace Name,hiouchiyamldemo
Resource Group,AzureMLserviceRG
Location,eastus
Project Directory,./sample_projects/ml-regression-local-taxi
Experiment Name,ml-regression-local-taxi


## ２．データの取得（BLOB Storageからダウンロード）
#### ※Azure Databricksによって加工済みのもの

In [5]:
import numpy as np
from azure.storage.blob import BlockBlobService
import pandas as pd
import os.path
    
if not os.path.exists('./inputdata.csv') :
    account_name='cognitiveservicerg978'
    account_key='IQndkgEB3mLChKaaq3VW1B5Hp97tnH1EXg4XMJRujWOkVpJpsC52H6O29I7GT33K7LFIvU0V7iCFcD9XPZFlyQ=='
    container_name='taxi-demo'
    blob_name='processed_data/part-00000-tid-6391339617973718694-497cca92-7d12-4677-8747-31da860d090f-20026-c000.csv'
        
    service = BlockBlobService(account_name=account_name, account_key=account_key)
    service.get_blob_to_path(container_name,blob_name,'inputdata.csv')
    
df = pd.read_csv('./inputdata.csv')
X_train = df.drop(columns=["pickup_year","pickup_month","pickup_day","count","avg_trip_distance","avg_trip_time_in_secs"],axis=1)
y_train = df["count"].values

## ３．トレーニング実施（Automated MLを使ってローカル環境）

In [6]:
#Automated MLの設定
import logging

automl_settings = {
    "iteration_timeout_minutes" : 30,
    "iterations" : 3,
    "primary_metric" : 'spearman_correlation',
    "preprocess" : True,
    "verbosity" : logging.INFO,
    "n_cross_validations": 5
}

from azureml.train.automl import AutoMLConfig

# AML コンピュート
automated_ml_config = AutoMLConfig(task = 'regression',
                             debug_log = 'automated_ml_errors.log',
                             X = X_train,
                             y = y_train,
                             **automl_settings)

In [None]:
#トレーニング実行
local_run = exp.submit(automated_ml_config, show_output = False)
local_run

In [None]:
#トレーニングの進捗確認
from azureml.widgets import RunDetails
RunDetails(local_run).show()

In [None]:
#トレーニング完了まで待機
%%time
# Shows output of the run on stdout.
local_run.wait_for_completion(show_output=True)

In [None]:
children = list(local_run.get_children())
metricslist = {}
for run in children:
    properties = run.get_properties()
    metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}
    metricslist[int(properties['iteration'])] = metrics

rundata = pd.DataFrame(metricslist).sort_index(1)
rundata

In [None]:
best_run, fitted_model = local_run.get_output()
print(best_run)
print(fitted_model)

In [None]:
description = 'Automated Machine Learning Model'
tags = None
local_run.register_model(description=description, tags=tags)
print(local_run.model_id) # Use this id to deploy the model as a web service in Azure

## 以降バックアップ

In [None]:
# register model 
model = run_persistent.register_model(model_name='mymodel', model_path='outputs/keras_cifar10_trained_model.h5')
print(model.name, model.id, model.version, sep = '\t')