### Example running fine-tuning on Azure Open AI's SQL query generation problem.
We use the same base dataset sql_examples.jsonl 

#### Data preparation   

In [27]:
import pandas as pd
from sklearn.model_selection import train_test_split
#load raw data
sql_raw = pd.read_json("../llama2/data/sql_examples.jsonl", lines=True).to_dict(orient="records") # load raw data from jsonl file
template= '{{"messages": [{{"role": "system", "content": "You are querying the sales database, what is the SQL query for the following input question?"}}, {{"role": "user", "content": "{question}"}}, {{"role": "assistant", "content": "{sql_query}"}}]}}'
#apply the template to the raw data
sql_data = [template.format(question=d["question"], sql_query=d["sql_query"]) for d in sql_raw]
#save the data to a jsonl file
sql_data_train, sql_data_test = train_test_split(sql_data, test_size=0.2, random_state=42) 
with open("../data/sql_examples_training.jsonl", "w") as f:
    for line in sql_data_train:
        f.write(line + "\n")
with open("../data/sql_examples_validation.jsonl", "w") as f:
    for line in sql_data_test:
        f.write(line + "\n")


In [29]:
os.environ.get("AZURE_OPENAI_API_KEY")

In [1]:
# Upload fine-tuning files

import openai
import os
import openai
import os
from pathlib import Path  
import json
import re
from dotenv import load_dotenv
env_path = Path('../utils') / 'secrets.env'
load_dotenv(dotenv_path=env_path)
openai.api_key =  os.environ.get("AZURE_OPENAI_API_KEY")
openai.api_base =  os.environ.get("AZURE_OPENAI_ENDPOINT")
openai.api_type = "azure"
openai.api_version = "2023-07-01-preview"


training_file_name = '../data/sql_examples_training.jsonl'
validation_file_name = '../data/sql_examples_validation.jsonl'

# Upload the training and validation dataset files to Azure OpenAI with the SDK.

training_response = openai.File.create(
    file=open(training_file_name, "rb"), purpose="fine-tune", user_provided_filename="sql_examples_training.jsonl"
)
training_file_id = training_response["id"]

validation_response = openai.File.create(
    file=open(validation_file_name, "rb"), purpose="fine-tune", user_provided_filename="sql_examples_validation.jsonl"
)
validation_file_id = validation_response["id"]

print("Training file ID:", training_file_id)
print("Validation file ID:", validation_file_id)

Training file ID: file-2436f03af32c437ab609a1f79a9598de
Validation file ID: file-fbf685b8f1184d989fd5d42e897a6f49


In [3]:
response = openai.FineTuningJob.create(
    training_file=training_file_id,
    validation_file=validation_file_id,
    model="gpt-35-turbo",
)

job_id = response["id"]

# You can use the job ID to monitor the status of the fine-tuning job.
# The fine-tuning job will take some time to start and complete.

print("Job ID:", response["id"])
print("Status:", response["status"])
print(response)

InvalidRequestError: Resource not found