In [1]:
from azureml.core import Workspace
ws = Workspace.from_config()
import tensorflow as tf
import joblib

In [2]:
import azureml.core
print(azureml.core.VERSION)

1.38.0


In [3]:
from azureml.core import Dataset
import azureml.core
import azureml.contrib.dataset
from azureml.core import Dataset, Workspace , Datastore




In [4]:
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np



In [5]:
import pandas as pd


dataset = Dataset.get_by_name(ws, name='weather')
df = dataset.to_pandas_dataframe()


In [6]:
from azureml.core import Dataset

default_ds = ws.get_default_datastore()

if 'weather' not in ws.datasets:
    default_ds.upload_files(files=['TrainData.csv'], # Upload the diabetes csv files in /data
                        target_path='weather/', # Put it in a folder path in the datastore
                        overwrite=True, # Replace existing files of the same name
                        show_progress=True)

    #Create a tabular dataset from the path on the datastore (this may take a short while)
    tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, '.csv'))

    # Register the tabular dataset
    try:
        tab_data_set = tab_data_set.register(workspace=ws, 
                                name='weather dataset',
                                description='weather data',
                                tags = {'format':'CSV'},
                                create_new_version=True)
        print('Dataset registered.')
    except Exception as ex:
        print(ex)
else:
    print('Dataset already registered.')

Dataset already registered.


In [7]:
import os

experiment_folder = 'weather_pipeline'
os.makedirs(experiment_folder ,exist_ok = True)

print(experiment_folder)

weather_pipeline


In [8]:
%%writefile  $experiment_folder/prep_data.py
import os
import argparse
import pandas as pd
from azureml.core import Run
from sklearn.preprocessing import MinMaxScaler

#get params
parser = argparse.ArgumentParser()
parser.add_argument('--input-data'  , type=str  ,dest = 'raw_dataset_id' , help = 'raw dataset')
parser.add_argument('--prepped-data' , type=str , dest = 'prepped_data' , default = 'prepped_data' )
args = parser.parse_args()
save_folder = args.prepped_data

run = Run.get_context()

print("Loading data...")
weather = run.input_datasets['raw_data'].to_pandas_dataframe()

row_count =(len(weather))
run.log('raw_rows' , row_count)

scaler = MinMaxScaler(feature_range = (-1,1))
num_cols = [' _tempm']

weather[num_cols] = scaler.fit_transform(weather[num_cols])

row_count = len(weather)
run.log('processed_rows'  , row_count)


print('saving data ...')
os.makedirs(save_folder, exist_ok=True)
save_path = os.path.join(save_folder,'data.csv')
weather.to_csv(save_path , index = False , header = True)

run.complete()



Overwriting weather_pipeline/prep_data.py


In [9]:
%%writefile $experiment_folder/train_weather.py
from azureml.core import Run , Model
import argparse
import pandas as pd
import numpy as np
import joblib   
import pickle
import os 
from tensorflow.keras.layers import Dense , RepeatVector , LSTM , Dropout , Flatten , Conv1D, MaxPooling1D , Bidirectional 
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import plot_model
from tensorflow import keras
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.python.framework.ops import enable_eager_execution
enable_eager_execution()




parser = argparse.ArgumentParser()
parser.add_argument("--training-folder", type=str, dest='training_folder', help='training data folder')
args = parser.parse_args()
training_folder = args.training_folder



run = Run.get_context()

print('loading data ...')
file_path = os.path.join(os.getcwd(),training_folder , 'data.csv')
weather = pd.read_csv(file_path)



data_scaled = weather[' _tempm']

steps = 24
inp = []
out = []
for i in range(len(data_scaled)- (steps)):
    inp.append(data_scaled[i:i+steps])
    out.append(data_scaled[i+steps])

inp=np.asanyarray(inp)
input1 = inp.reshape(len(inp) , steps, 1)
out=np.asanyarray(out)


x_train = input1[:500]
x_test = input1[500:]
y_train = out[:500]
y_test = inp[500:]


def create_model():
    model = tf.keras.models.Sequential([
            keras.layers.Conv1D(256 , kernel_size = 2 , activation = 'relu' , input_shape = (24,1)),              
            keras.layers.Conv1D(128 , kernel_size = 2 , activation = 'relu' ),    
            keras.layers.MaxPooling1D(pool_size = 2),
            keras.layers.Flatten(),
            keras.layers.RepeatVector(24),
            keras.layers.LSTM(units  = 100 , return_sequences = True  , activation = 'relu' ),
            keras.layers.Dropout(0.2),
            keras.layers.LSTM(units  = 100 , return_sequences = True  , activation = 'relu' ),
            keras.layers.Dropout(0.2),
            keras.layers.LSTM(units  = 100 , return_sequences = True  , activation = 'relu' ),
            keras.layers.MaxPooling1D(pool_size = 2),
            keras.layers.LSTM(units  = 100 , return_sequences = True  , activation = 'relu' ),
            keras.layers.Bidirectional(LSTM(128 , activation = 'relu')),
            keras.layers.Dense(100 , activation = 'relu'),
            keras.layers.Dense(1)

    ])
    model.compile(loss='mse', optimizer='adam' ,metrics=['accuracy' ,'mse', 'mae', 'mape', 'cosine'])

   
    return model
    
model = create_model()
history = model.fit(x_train , y_train , epochs = 1 )


plt.title('Loss / Mean Squared Error')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['mean_squared_error'])
plt.plot(history.history['mean_absolute_error'])
plt.plot(history.history['mean_absolute_percentage_error'])
plt.plot(history.history['cosine_proximity'])
plt.legend()
plt.show()

print('Saving model ...')
# os.makedirs('outputs', exist_ok=True)
# model_file = os.path.join('outputs', 'weather_forecasting.h5')
# pickle.dump(model, open(model_file, 'wb'))
model_file = 'weather_forecasting.h5'
with open('weather_forecasting.h5' , 'wb') as handle:
    pickle.dump(model_file , handle ,protocol= pickle.HIGHEST_PROTOCOL)
print('Registering model...')

Model.register(workspace = run.experiment.workspace , 
                model_path = model_file,
                model_name = 'weather_forecasting' ,
                tags = {'training context' : 'pipeline'},
                )
run.complete()


Overwriting weather_pipeline/train_weather.py


In [10]:
from azureml.core.compute import ComputeTarget  , AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = 'compute-cluster'

try:
    pipeline_cluster = ComputeTarget(workspace= ws,name = cluster_name)
    print(' Found existing cluster , use it')
except ComputeTargetException:
    try:
        compute_config = AmlCompute.provisioning_configuration(
            vm_size = 'Standard_DS11_v2',
            max_nodes=4)
        pipeline_cluster = ComputeTarget.create(ws, cluster_name , compute_config)
        pipeline_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

    

 Found existing cluster , use it


In [11]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core import Workspace, Experiment, ScriptRunConfig, RunConfiguration

weather_env = Environment('weather-pipeline-env')

weather_packages = CondaDependencies.create(conda_packages=['scikit-learn' , 'ipykernel' , 'matplotlib' , 'pandas' , 'pip' ],
                                                        pip_packages = ['azureml-defaults' , 'azureml-dataprep[pandas]' , 'pyarrow'])
weather_packages.add_tensorflow_conda_package(core_type='cpu', version=None)
weather_packages.add_tensorflow_pip_package(core_type='cpu', version=None)
weather_env.python.conda_dependencies = weather_packages

weather_env.register(workspace=ws)
registered_env = Environment.get(ws,'weather-pipeline-env')
pipeline_run_config = RunConfiguration()
pipeline_run_config.target = pipeline_cluster

pipeline_run_config.environment = registered_env

print('run configuration created')



run configuration created


In [12]:
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import PythonScriptStep


weather_ds = ws.datasets.get("weather1")


# Create a PipelineData (temporary Data Reference) for the model folder
prepped_data_folder = PipelineData("prepped_data_folder", datastore=ws.get_default_datastore())

# Step 1, Run the data prep script
train_step = PythonScriptStep(name = "Prepare Data",
                                source_directory = experiment_folder,
                                script_name = "prep_data.py",
                                arguments = ['--input-data', weather_ds.as_named_input('raw_data'),
                                             '--prepped-data', prepped_data_folder],
                                outputs=[prepped_data_folder],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)

# Step 2, run the training script
register_step = PythonScriptStep(name = "Train and Register Model",
                                source_directory = experiment_folder,
                                script_name = "train_weather.py",
                                arguments = ['--training-folder', prepped_data_folder],
                                inputs=[prepped_data_folder],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)

print("Pipeline steps defined")

Pipeline steps defined


In [13]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails

# Construct the pipeline
pipeline_steps = [train_step, register_step]
pipeline = Pipeline(workspace=ws, steps=pipeline_steps)
print("Pipeline is built.")

# Create an experiment and run the pipeline
experiment = Experiment(workspace=ws, name = 'weather-pipeline')
pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
print("Pipeline submitted for execution.")
RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion(show_output=True)

Pipeline is built.
Created step Prepare Data [4d2e4190][5c1c53cc-7166-43ee-bb29-cf2d8485546e], (This step will run and generate new outputs)
Created step Train and Register Model [71b236de][293ecd82-ca9a-4081-bbfa-b9bf985cb84a], (This step will run and generate new outputs)
Submitted PipelineRun 7695e374-f9b6-47be-b11d-3b1134a6fa91
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/7695e374-f9b6-47be-b11d-3b1134a6fa91?wsid=/subscriptions/09b3bdbf-5f62-40eb-8cdc-3e3652cd7c78/resourcegroups/new_resourcegroup/workspaces/xcuongbatch34&tid=3324bb2d-9b60-42f9-a25b-5913ffea01fe
Pipeline submitted for execution.


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: 7695e374-f9b6-47be-b11d-3b1134a6fa91
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/7695e374-f9b6-47be-b11d-3b1134a6fa91?wsid=/subscriptions/09b3bdbf-5f62-40eb-8cdc-3e3652cd7c78/resourcegroups/new_resourcegroup/workspaces/xcuongbatch34&tid=3324bb2d-9b60-42f9-a25b-5913ffea01fe
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: 78464cfc-717f-4552-86b1-0e672ba65ada
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/78464cfc-717f-4552-86b1-0e672ba65ada?wsid=/subscriptions/09b3bdbf-5f62-40eb-8cdc-3e3652cd7c78/resourcegroups/new_resourcegroup/workspaces/xcuongbatch34&tid=3324bb2d-9b60-42f9-a25b-5913ffea01fe
StepRun( Prepare Data ) Status: Running

StepRun(Prepare Data) Execution Summary
StepRun( Prepare Data ) Status: Finished
{'runId': '78464cfc-717f-4552-86b1-0e672ba65ada', 'target': 'compute-cluster', 'status': 'Completed', 'startTimeUtc': '2022-03-06T02:47:18.813732Z', 'endTimeUtc': '2022-03-06T02:47:40.14284Z', 'service

'Finished'

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [14]:
for run in pipeline_run.get_children():
    print(run.name, ':')
    metrics = run.get_metrics()
    for metric_name in metrics:
        print('\t' ,metric_name , ":" , metrics[metric_name]) 

Train and Register Model :
Prepare Data :
	 raw_rows : 872
	 processed_rows : 872


In [15]:
from azureml.core import Model
for model in Model.list(ws):
    print(model.name , 'version: ' , model.version )
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print('\t' , tag_name , ':' , tag)

weather_forecasting version:  2
	 training context : pipeline
weather_forecasting version:  1
	 training context : pipeline
diabetes_model version:  3
	 Training context : Pipeline
diabetes_model version:  2
	 Training context : Pipeline
diabetes_model version:  1
	 Training context : Pipeline
testing version:  8
testing version:  7
testing version:  6
testing version:  5
testing version:  4
testing version:  3
testing version:  2
testing version:  1
test version:  3
test version:  2
test version:  1
1 version:  2
1 version:  1
c version:  2
c version:  1
weather_forecast version:  14
weather_forecast version:  13
weather_forecast version:  12
weather_forecast version:  11
weather_forecast version:  10
weather_forecast version:  9
weather_forecast version:  8
weather_forecast version:  7
weather_forecast version:  6
weather_forecast version:  5
weather_forecast version:  4
weather_forecast version:  3
weather_forecast version:  2
weather_forecast version:  1
LSTMver1 version:  17
	 are

In [17]:
published_pipeline = pipeline_run.publish_pipeline(
    name= "LSTM-training-pipeline"  , description= "trains LSTM model" , version= "1.0"
)
published_pipeline

Name,Id,Status,Endpoint
LSTM-training-pipeline,63169b8a-4671-4e13-a020-8da2cd70562a,Active,REST Endpoint


In [18]:
rest_endpoint = published_pipeline.endpoint
print(rest_endpoint)

https://eastus2.api.azureml.ms/pipelines/v1.0/subscriptions/09b3bdbf-5f62-40eb-8cdc-3e3652cd7c78/resourceGroups/new_resourcegroup/providers/Microsoft.MachineLearningServices/workspaces/xcuongbatch34/PipelineRuns/PipelineSubmit/63169b8a-4671-4e13-a020-8da2cd70562a


In [19]:
from azureml.core.authentication import InteractiveLoginAuthentication
interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()
print("authentication header ready.")

authentication header ready.


In [20]:
import requests
experiment_name ='weather-pipeline'

rest_endpoint = published_pipeline.endpoint

response = requests.post(rest_endpoint ,
                        headers= auth_header,
                        json ={"experimentName ": experiment_name}
)
run_id = response.json()['Id']
run_id

'eb9ce07f-f715-46da-8b4e-5d2a6de1ed9b'

In [21]:
from azureml.pipeline.core import ScheduleRecurrence , Schedule

recurrence = ScheduleRecurrence(frequency= "Week" , interval= 1 , week_days = ['Monday'], time_of_day= "00:00")
weekly_schedule = Schedule.create(ws, name = "weekly-lstm-training" ,description= "based on time ", pipeline_id = published_pipeline.id, experiment_name= 'weather-pipeline' , recurrence=recurrence)
print("pipeline scheduled")

pipeline scheduled


In [22]:
Schedules = Schedule.list(ws)
Schedule

azureml.pipeline.core.schedule.Schedule

In [23]:
pipeline_experiment = ws.experiments.get('weather-pipeline')
lastest_run = list(pipeline_experiment.get_runs())[0]
lastest_run.get_details()

{'runId': '179e6c58-6f44-48af-9e05-a3fe09a0cf8d',
 'status': 'Completed',
 'startTimeUtc': '2022-03-06T03:29:14.528075Z',
 'endTimeUtc': '2022-03-06T03:29:16.533959Z',
 'services': {},
 'properties': {'azureml.runsource': 'azureml.PipelineRun',
  'runSource': 'Unavailable',
  'runType': 'Schedule',
  'azureml.parameters': '{}',
  'azureml.continue_on_step_failure': 'False',
  'azureml.pipelineComponent': 'pipelinerun',
  'azureml.pipelineid': '63169b8a-4671-4e13-a020-8da2cd70562a'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'logs/azureml/executionlogs.txt': 'https://xcuongbatch349302342871.blob.core.windows.net/azureml/ExperimentRun/dcid.179e6c58-6f44-48af-9e05-a3fe09a0cf8d/logs/azureml/executionlogs.txt?sv=2019-07-07&sr=b&sig=vrSoNhiMusLKR4qt7bg%2BksDyaln9%2F1JMifvPp9YoVXM%3D&skoid=37fa96a2-7eb5-4dc7-b5cb-3691270e2fe6&sktid=3324bb2d-9b60-42f9-a25b-5913ffea01fe&skt=2022-03-06T01%3A51%3A06Z&ske=2022-03-07T10%3A01%3A06Z&sks=b&skv=2019-07-07&st=2022-03-06T03%3A20%3A44Z&se