In [None]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient

try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()
    
# Get a handle to workspace
ml_client = MLClient.from_config(credential=credential)

In [None]:
import os

# create a folder for the script files
script_folder = 'src'
os.makedirs(script_folder, exist_ok=True)
print(script_folder, 'folder created')

In [None]:
%%writefile tokenization.py
import argparse
from transformers import AutoTokenizer

def tokenize_dialogue(dialogue, model_name='google/flan-t5-base'):
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Tokenize input
    inputs = tokenizer(dialogue, return_tensors='pt')

    # Assuming output to file for next component's consumption (adjust as needed)
    with open("tokenized_text.pt", "wb") as f:
        torch.save(inputs, f)

def main():
    parser = argparse.ArgumentParser(description="Tokenize input dialogue")
    parser.add_argument("--dialogue", type=str, required=True, help="Input dialogue for tokenization")
    
    args = parser.parse_args()

    # Tokenize dialogue
    tokenize_dialogue(args.dialogue)

if __name__ == "__main__":
    main()


In [None]:
%%writefile sentiment_prediction.py
import argparse
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

def predict_sentiment(tokenized_input_path, model_name='google/flan-t5-base'):
    # Load model
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

    # Load tokenized input
    with open(tokenized_input_path, "rb") as f:
        inputs = torch.load(f)

    # Generate prediction
    output = model.generate(inputs["input_ids"], max_new_tokens=50)
    
    # Load tokenizer to decode output
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)

    print(decoded_output)

def main():
    parser = argparse.ArgumentParser(description="Predict sentiment from tokenized input")
    parser.add_argument("--tokenized_input_path", type=str, required=True, help="Path to tokenized input file")
    
    args = parser.parse_args()

    # Predict sentiment
    predict_sentiment(args.tokenized_input_path)

if __name__ == "__main__":
    main()


In [None]:
%%writefile tokenization.yml
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
name: tokenization
display_name: Tokenization of Dialogue
version: 1
type: command
inputs:
  dialogue: 
    type: string
outputs:
  tokenized_text_path:
    type: uri_file
code: ./
environment: azureml:AzureML-pytorch-1.13-ubuntu20.04-py38-cpu@latest
command: >-
  python tokenization.py 
  --dialogue ${{inputs.dialogue}}


In [None]:
%%writefile sentiment_prediction.yml
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
name: sentiment_prediction
display_name: Sentiment Prediction
version: 1
type: command
inputs:
  tokenized_input_path: 
    type: uri_file
outputs:
  sentiment_output:
    type: string
code: ./
environment: azureml:AzureML-pytorch-1.13-ubuntu20.04-py38-cpu@latest
command: >-
  python sentiment_prediction.py 
  --tokenized_input_path ${{inputs.tokenized_input_path}}
