In [None]:
import azureml.core
from azureml.core import Workspace, Datastore

ws = Workspace.from_config()

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute

compute_name = "scraper-engine"

if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('Found compute target: ' + compute_name)
else:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS3_v2',
                                                                min_nodes=0,
                                                                max_nodes=4)
    compute_target = ComputeTarget.create(
        ws, compute_name, provisioning_config)

    compute_target.wait_for_completion(
        show_output=True, min_node_count=None, timeout_in_minutes=20)

    print(compute_target.status.serialize())

In [None]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core import Environment 

aml_run_config = RunConfiguration()
aml_run_config.target = compute_target
aml_run_config.environment.python.user_managed_dependencies = False

aml_run_config.environment.python.conda_dependencies = CondaDependencies.create( 
    pip_packages=['azureml-sdk', 'azureml-dataset-runtime[fuse,pandas]', 'facebook-scraper'], 
    pin_sdk_version=False)

In [None]:
from azureml.pipeline.steps import PythonScriptStep

scrap_data = PythonScriptStep(name="Facebook-scraper",
                        script_name="./scraper.py",
                        compute_target=compute_target,
                        source_directory='./',
                        runconfig=aml_run_config,
                        allow_reuse=False)

In [None]:
# TODO complete data movement step 
# from azureml.pipeline.steps import DataTransferStep
# from azureml.data.data_reference import DataReference

# # https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-data-transfer.ipynb
# adlsgen2_datastore = Datastore.get(ws, 'workspaceblobstore')

# adlsgen2_data_ref = DataReference(
#     datastore=adlsgen2_datastore,
#     data_reference_name='adls',
#     path_on_datastore='fb_data')

# transfer_data = DataTransferStep(
#     name='transfer_data_to_adls',
#     source_data_reference=adlsgen2_data_ref,
#     destination_data_reference=adlsgen2_data_ref,
#     compute_target=data_factory_compute)


In [None]:
from azureml.pipeline.core import Pipeline

main_pipeline = Pipeline(workspace=ws, steps=[scrap_data])

In [None]:
from azureml.core import Experiment

pipeline_run = Experiment(ws, 'Facebook-scraper-MS').submit(main_pipeline)
pipeline_run.wait_for_completion()