In [None]:
import azureml.core
from azureml.core import Workspace, Datastore
from azureml.data import OutputFileDatasetConfig

ws = Workspace.from_config()

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute

In [None]:
compute_name = "vkontogCompute"

In [None]:
# Default datastore 
#datastore= Datastore(ws, "workspaceblobstore")
datastore= Datastore(ws, "workspacefilestore")

In [None]:
if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print('Found compute target: ' + compute_name)
else:
    print('Please set up a proper compute')


In [None]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core import Environment 


aml_run_config = RunConfiguration()
# Use just-specified compute target ("cpu-cluster")
aml_run_config.target = compute_target

In [None]:
aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(
    conda_packages=['pandas','scikit-learn'], 
    pip_packages=['azureml-sdk', 'azureml-dataset-runtime[fuse,pandas]',' pyarrow','openpyxl','xlrd','spacy'], 
    pin_sdk_version=False)



In [None]:
output_data = OutputFileDatasetConfig(destination = (datastore, 'UI/NLP/tokens'))
output_data_dataset = output_data.register_on_complete(name = 'exported_tokens')

In [None]:
from azureml.data import OutputFileDatasetConfig
from azureml.pipeline.steps import PythonScriptStep

dataprep_step0 = PythonScriptStep(
    name="Spacy_NLP_v0", 
    script_name="./Text_Mining_withSpacy/pipTest.py", 
    compute_target=compute_target, 
    runconfig=aml_run_config,
    allow_reuse=True
)

dataprep_step1 = PythonScriptStep(
    name="Spacy_NLP_v1", 
    script_name="./Text_Mining_withSpacy/Spacy_NLP_on_LinkComments_v5.py", 
    compute_target=compute_target, 
    runconfig=aml_run_config,
    allow_reuse=True,
    arguments=["--output", output_data]
    )

In [None]:
from azureml.pipeline.core import Pipeline
from azureml.core import Experiment

pipeline = Pipeline(ws, [dataprep_step0,dataprep_step1])

experiment = Experiment(workspace=ws, name='Link_Comments')

azureml._restclient.snapshots_client.SNAPSHOT_MAX_SIZE_BYTES = 1000000000

run = experiment.submit(pipeline, show_output=True)
run.wait_for_completion()