In [None]:
import os
from openai import OpenAI

api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise EnvironmentError("OPENAI_API_KEY environment variable not set.")
client = OpenAI(api_key=api_key)

### CVQA MCQ SFT

#### CVQA-MCQ-Overlapped

In [None]:
import json

path = "../data/cvqa_sft/cvqa_sft_gpt4o.jsonl"

# Read and parse the first line
with open(path, "r", encoding="utf-8") as f:
    first_line = f.readline()
    record = json.loads(first_line)

print(json.dumps(record, ensure_ascii=False, indent=2))


In [None]:
training = client.files.create(
    file=open("../data/cvqa_sft/cvqa_sft_gpt4o.jsonl", "rb"),
    purpose="fine-tune"
)

print("file id =", training.id)


job = client.fine_tuning.jobs.create(
  model="gpt-4o-2024-08-06",
  training_file=training.id,
  method={
        "type": "supervised",
        "supervised": {
            "hyperparameters": {"n_epochs": 1},
        },
    },
  
)
print("fine-tune job id =", job.id)

#### CVQA-MCQ-Exclsive

In [None]:
import json

path = "../data/cvqa_sft/cvqa_sft_gpt4o_exclusive.jsonl"

# Read and parse the first line
with open(path, "r", encoding="utf-8") as f:
    first_line = f.readline()
    record = json.loads(first_line)

print(json.dumps(record, ensure_ascii=False, indent=2))


In [None]:
training = client.files.create(
    file=open("../data/cvqa_sft/cvqa_sft_gpt4o_exclusive.jsonl", "rb"),
    purpose="fine-tune"
)

print("file id =", training.id)

job = client.fine_tuning.jobs.create(
  model="gpt-4o-2024-08-06",
  training_file=training.id,
  method={
        "type": "supervised",
        "supervised": {
            "hyperparameters": {"n_epochs": 1},
        },
    },
  
)
print("fine-tune job id =", job.id)


### Safety SFT

#### Safety-SFT-Overlapped

In [None]:
import json

path = "../data/safety_sft/safety_sft_gpt4o.jsonl"

# Read and parse the first line
with open(path, "r", encoding="utf-8") as f:
    first_line = f.readline()
    record = json.loads(first_line)

print(json.dumps(record, ensure_ascii=False, indent=2))


In [None]:
training = client.files.create(
    file=open(f"../data/safety_sft/safety_sft_gpt4o.jsonl", "rb"),
    purpose="fine-tune"
)

print("file id =", training.id)

job = client.fine_tuning.jobs.create(
  model="gpt-4o-2024-08-06",
  training_file=training.id,
  method={
        "type": "supervised",
        "supervised": {
            "hyperparameters": {"n_epochs": 1},
        },
    },
  
)
print("fine-tune job id =", job.id)

#### Safety-SFT-Exclsive

In [None]:
import json

path = "../data/safety_sft/safety_sft_gpt4o_exclusive.jsonl"

# Read and parse the first line
with open(path, "r", encoding="utf-8") as f:
    first_line = f.readline()
    record = json.loads(first_line)

print(json.dumps(record, ensure_ascii=False, indent=2))


In [None]:
training = client.files.create(
    file=open(f"../data/safety_sft/safety_sft_gpt4o_exclusive.jsonl", "rb"),
    purpose="fine-tune"
)

print("file id =", training.id)

job = client.fine_tuning.jobs.create(
  model="gpt-4o-2024-08-06",
  training_file=training.id,
  method={
        "type": "supervised",
        "supervised": {
            "hyperparameters": {"n_epochs": 1},
        },
    },
  
)
print("fine-tune job id =", job.id)

### Safety DPO

#### Safety-DPO-Overlapped

In [None]:
import json

path = "../data/safety_dpo/safety_dpo_gpt4o.jsonl"

# Read and parse the first line
with open(path, "r", encoding="utf-8") as f:
    first_line = f.readline()
    record = json.loads(first_line)

print(json.dumps(record, ensure_ascii=False, indent=2))


In [None]:
training = client.files.create(
    file=open("../data/safety_dpo/safety_dpo_gpt4o.jsonl", "rb"),
    purpose="fine-tune"
)

print("file id =", training.id)

job = client.fine_tuning.jobs.create(
    training_file=training.id,
    model="gpt-4o-2024-08-06",
    method={
        "type": "dpo",
        "dpo": {
            "hyperparameters": {"beta": 0.1, "n_epochs": 1, "batch_size": 8}
        },
    },
)

print("fine-tune job id =", job.id)

#### Safety-DPO-Exclsive

In [None]:
import json

path = "../data/safety_dpo/safety_dpo_gpt4o_exclusive.jsonl"

# Read and parse the first line
with open(path, "r", encoding="utf-8") as f:
    first_line = f.readline()
    record = json.loads(first_line)

print(json.dumps(record, ensure_ascii=False, indent=2))


In [None]:
training = client.files.create(
    file=open("../data/safety_dpo/safety_dpo_gpt4o_exclusive.jsonl", "rb"),
    purpose="fine-tune"
)

print("file id =", training.id)

job = client.fine_tuning.jobs.create(
    training_file=training.id,
    model="gpt-4o-2024-08-06",
    method={
        "type": "dpo",
        "dpo": {
            "hyperparameters": {"beta": 0.1, "n_epochs": 1, "batch_size": 8}
        },
    },
)

print("fine-tune job id =", job.id)