In [None]:
from azure.ai.ml import MLClient
from azure.identity import (
    DefaultAzureCredential,
    InteractiveBrowserCredential,
)
import time

from azure.ai.ml.dsl import pipeline
from azure.ai.ml import Input

import ast

In [None]:
def fetch_data(model_name):
       import requests
       import pandas as pd
       from io import StringIO
       from sklearn.model_selection import train_test_split
       from sklearn.preprocessing import LabelEncoder

       seed = 613

       url = f""
       response = requests.get(url)

       response.raise_for_status()

       csv_data = StringIO(response.text)

       df = pd.read_csv(csv_data)

       test_size, val_size = .2, .2

       train_df, temp_df = train_test_split(df, test_size=test_size + val_size, random_state=seed, stratify=df['label_string'])

       val_size_adj = val_size / (val_size + test_size)
       val_df, test_df = train_test_split(temp_df, test_size=1 - val_size_adj, random_state=seed, stratify=temp_df['label_string'])

       train_df.to_json("train.jsonl", orient='records', lines=True)
       val_df.to_json("val.jsonl", orient='records', lines=True)
       test_df.to_json("test.jsonl", orient='records', lines=True)


In [None]:
try:
    credential = DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    credential = InteractiveBrowserCredential()

subscription_id = ''
resource_group_name = ''
workspace_name = ''


try:
    workspace_ml_client = MLClient.from_config(credential=credential)
except:
    workspace_ml_client = MLClient(
        credential,
        subscription_id=subscription_id,
        resource_group_name=resource_group_name,
        workspace_name=workspace_name,
    )

# the models, fine tuning pipelines and environments are available in the AzureML system registry, "azureml"
registry_ml_client = MLClient(credential, registry_name="azureml")

In [None]:
# define the pipeline job
@pipeline()
def create_pipeline():
    text_classification_pipeline = pipeline_component_func(
        # specify the foundation model available in the azureml system registry id identified in step #3
        mlflow_model_path=foundation_model.id,
        # huggingface_id = 'bert-base-uncased', # to use a huggingface model, uncomment this line and comment the above line
        compute_model_import=compute_name,
        compute_preprocess=compute_name,
        compute_finetune=compute_name,
        compute_model_evaluation=compute_name,
        # map the dataset splits to parameters
        train_file_path=Input(
            type="uri_file", path= "./train.jsonl"
        ),
        validation_file_path=Input(
            type="uri_file", path= "val.jsonl"
        ),
        test_file_path=Input(
            type="uri_file", path= "./test.jsonl"
        ),
        evaluation_config=Input(
            type="uri_file", path="./text-classification-config.json"
        ),
        # The following parameters map to the dataset fields
        sentence1_key="text",
        label_key="label_string",
        # Training settings
        number_of_gpu_to_use_finetuning=gpus_per_node,  # set to the number of GPUs available in the compute
        **training_parameters,
        **optimization_parameters
    )
    return {
        # map the output of the fine tuning job to the output of pipeline job so that we can easily register the fine tuned model
        # registering the model is required to deploy the model to an online or batch endpoint
        "trained_model": text_classification_pipeline.outputs.mlflow_model_folder
    }