In [None]:
!pip install -U azure-ai-ml

In [None]:
!pip install --upgrade openai

In [None]:
#Read Training Data

import pandas as pd
import os
import openai
import subprocess
# #Import Data
df = pd.read_csv("./data/newsolar.csv")
df.to_json("newsolar.jsonl", orient='records', lines=True)

In [None]:
!openai tools fine_tunes.prepare_data -f newsolar.jsonl -q

In [None]:
# Import required libraries
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential

from azure.ai.ml import MLClient, Input, load_component
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml.dsl import pipeline

In [None]:
try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

In [None]:
subscription_id = ""
resource_group = ""
workspace_name = ""

In [None]:
# Get a handle to registry
ml_client = MLClient(
    credential=credential, registry_name="azureml", registry_location="swedencentral"
)

finetune_pipeline = load_component(
    client=ml_client, name="openai_completions_finetune_pipeline", version="0.1.2"
)

In [None]:
print("Pipeline component loaded successfully. Component spec:")
print(finetune_pipeline)

In [None]:
TASK_TYPE = "completion"  # Change this to completion for completion dataset


# Construct pipeline
@pipeline()
def pipeline_with_registered_component(
    train_dataset,
    validation_dataset,
    training_max_epochs=45,
    model="babbage-002",
    registered_model_name="sdk-train-babbage-polysilicon-m01-1",
    learning_rate_multiplier=1,
    batch_size=-1,
    task_type="completion",
):
    openai_completions_finetune_component_results = finetune_pipeline(
        train_dataset=train_dataset,
        validation_dataset=validation_dataset,
        n_epochs=training_max_epochs,
        model=model,
        task_type=task_type,
        registered_model_name=registered_model_name,
        learning_rate_multiplier=learning_rate_multiplier,
        batch_size=batch_size,
    )

    return openai_completions_finetune_component_results


pipeline_job = pipeline_with_registered_component(
    train_dataset=Input(type=AssetTypes.URI_FILE, path="./newsolar_prepared_train.jsonl"),
    validation_dataset=Input(type=AssetTypes.URI_FILE, path="./newsolar_prepared_valid.jsonl"),
    training_max_epochs=45,
    model="babbage-002",  # Select any model from ["babbage-002", "davinci-002", "gpt-35-turbo"]
    task_type=TASK_TYPE,
    registered_model_name="sdk-train-babbage-polysilicon-m01-1",
    learning_rate_multiplier=1,
    batch_size=-1,
)

# set pipeline level compute
pipeline_job.settings.default_compute = "serverless"

In [None]:
workspace_ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)

In [None]:
# Submit pipeline job to workspace
pipeline_job = workspace_ml_client.jobs.create_or_update(
    pipeline_job, experiment_name="sdk-train-babbage-polysilicon-m01-1"
)
pipeline_job

In [None]:
import requests
import json
import subprocess

# 1. Add your Azure OpenAI account details
subscription = ""
resource_group = ""
resource_name = "" #openai resource name
model_deployment_name = "sdk-train-babbage-polysilicon-m01-1"

# 2. Add the AzureML registered model name, registered model version, and the AzureML (AML) workspace path for your fine-tuned model.
# Your registered models data can be found in the `Models` tab of your AzureML workspace.
registered_model_name = "sdk-train-babbage-polysilicon-m01-1"
registered_model_version = "1"
workspace_path = f"/subscriptions/{subscription}/resourcegroups/{resource_group}/providers/Microsoft.MachineLearningServices/workspaces/{workspace_name}"


In [None]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential

try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    access_token = credential.get_token("https://management.azure.com/.default")
    token = access_token.token
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    # This will open a browser page for
    credential = InteractiveBrowserCredential()

In [None]:
deploy_params = {"api-version": "2023-05-01"}
deploy_headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json",
}

# 4. Set model deployment configuration. Here capacity refers to support for `1K Tokens Per Minute (TPM)` for your deployment.
deploy_data = {
    "sku": {"name": "Standard", "capacity": 1},
    "properties": {
        "model": {
            "format": "OpenAI",
            "name": f"{registered_model_name}",
            "version": f"{registered_model_version}",
            "source": f"{workspace_path}",
        }
    },
}

deploy_data = json.dumps(deploy_data)

In [None]:
deploy_params = {"api-version": "2023-05-01"}
deploy_headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json",
}

# 4. Set model deployment configuration. Here capacity refers to support for `1K Tokens Per Minute (TPM)` for your deployment.
deploy_data = {
    "sku": {"name": "Standard", "capacity": 50},
    "properties": {
        "model": {
            "format": "OpenAI",
            "name": f"{registered_model_name}",
            "version": f"{registered_model_version}",
            "source": f"{workspace_path}",
        }
    },
}

deploy_data = json.dumps(deploy_data)

In [None]:
# 5. Send PUT request to Azure cognitive services to create model deployment
request_url = f"https://management.azure.com//subscriptions/**/resourcegroups/**/providers/Microsoft.CognitiveServices/accounts/ailabazopenaise/deployments/sdk-train-babbage-polysilicon-m03"

r = requests.put(
    request_url, 
    params=deploy_params, 
    headers=deploy_headers, 
    data=deploy_data
)

print(r.json())


In [None]:
#SDK Batch size -1 Epoch 45

import openai
openai.api_type = "azure"
openai.api_base = ""
openai.api_version = "2023-09-15-preview"
openai.api_key = ""

response = openai.Completion.create(
      engine="sdk-train-babbage-polysilicon-m01-1",
      prompt = """CNBM to set up PV cell packaging materials facility in Tongcheng Economic 
      Development Zone: China National Building Material Group Co., Ltd. (CNBM) has announced that it has signed a cooperation agreement with Tongcheng Economic Development Zone Construction Investment Group. According to the agreement, CNBM will invest about RMB 2.5 billion ($349 million) to build a photovoltaic (PV) cell packaging material facility in the zone. The facility will consist of 2 production lines with a daily output of 1,200 tons each. The company plans to start construction on the 1st production line in March 2024 and expects to start production in March 2025. The 2nd production line is expected to become operational by October 2025. At full capacity, the facility is expected to generate RMB 3 billion 
      ($418.9 million) in annual revenues for the company.\\n\\n###\\n\\n""",
      temperature=0.1,
      max_tokens=10,
      top_p=0,
      frequency_penalty=0,
      presence_penalty=0,
      stop=["<\/s>"]
    )

print(response['choices'][0]['text'])

In [None]:
# 6. Send DELETE request to Azure cognitive services to delete model deployment
request_url = f"https://management.azure.com//subscriptions/**/resourcegroups/**/providers/Microsoft.CognitiveServices/accounts/ailabazopenaise/deployments/sdk-train-babbage-polysilicon-m03"

r = requests.delete(
    request_url, 
    params=deploy_params, 
    headers=deploy_headers
)
print(r)