In [None]:
from azureml.core import Workspace, Experiment, Datastore, Dataset
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import PythonScriptStep
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.environment import Environment
from azureml.core.runconfig import RunConfiguration
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core.conda_dependencies import CondaDependencies

import os

# Set up workspace
ws = Workspace.from_config()

# Define constants
compute_name = "simpleTest"
env_name = "usecaseTestEnv"
experiment_name = "dummy-text-multilabel-classification"

compute_target = ws.compute_targets[compute_name]

# Set up environment
conda_dep = CondaDependencies()
conda_dep.add_conda_package("python=3.8")
conda_dep.add_pip_package("pandas")
conda_dep.add_pip_package("scikit-learn")
conda_dep.add_pip_package("numpy")
conda_dep.add_pip_package("azureml-core")
conda_dep.add_pip_package("azureml-dataset-runtime")

env = Environment(name=env_name)
env.python.conda_dependencies = conda_dep
run_config = RunConfiguration()
run_config.environment = env

datastore = ws.get_default_datastore()
dataset = Dataset.Tabular.from_delimited_files(path=(datastore, "labelTwitterSmall.csv"))
# dataset = dataset.register(workspace=ws, name="labelTwitterSmall", create_new_version=True)

dataset2 = Dataset.Tabular.from_delimited_files(path=(datastore, "UI/2024-12-02_011832_UTC/topics_terms.csv"))
# dataset2 = dataset.register(workspace=ws, name="topics_terms", create_new_version=True)


# Output paths
preprocess_output = PipelineData("preprocess_output", datastore=datastore)
train_output = PipelineData("train_output", datastore=datastore)

# Step 1: Data Preprocessing
preprocess_step = PythonScriptStep(
    name="Data Preprocessing",
    script_name="preprocess.py",
    arguments=["--output_dir", preprocess_output],
    inputs=[dataset.as_named_input("input_data"), 
    dataset2.as_named_input("input_data_2")],
    outputs=[preprocess_output],
    compute_target=compute_target,
    source_directory=".",
    allow_reuse=False,
    runconfig=run_config
)

# Step 2: Model Training && Deployment
train_step = PythonScriptStep(
    name="Model Training and Deploying",
    script_name="model.py",
    arguments=[
        "--input_dir", preprocess_output,
        "--output_dir", train_output,
        "--num_labels", 20
    ],
    inputs=[preprocess_output],
    outputs=[train_output],
    compute_target=compute_target,
    source_directory=".",
    allow_reuse=False,
    runconfig=run_config
)

# Define pipeline
pipeline = Pipeline(workspace=ws, steps=[preprocess_step, train_step])

# Submit pipeline
experiment = Experiment(ws, experiment_name)
pipeline_run = experiment.submit(pipeline)
pipeline_run.wait_for_completion(show_output=True)

