# Notebook 2: Model Training

**Goal:** Load the `jsonl` dataset and submit a fine-tuning job.

## 1. Setup and Imports

In [None]:
import json
import os

from dotenv import load_dotenv
from openai import OpenAI

# Load API keys from .env file
load_dotenv()

# Init OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Define file paths
TRAINING_FILE_PATH = "outputs/datasets/training_data.jsonl"
MODEL_OUTPUT_DIR = "outputs/model_weights/"
MODEL_LOG_FILE = os.path.join(MODEL_OUTPUT_DIR, "finetune_job_log.json")

print(f"Training file: {TRAINING_FILE_PATH}")

## 2. Load and Validate Training Data

Quickly inspect the file to make sure it's valid before uploading.

In [None]:
try:
    with open(TRAINING_FILE_PATH, 'r') as f:
        lines = f.readlines()
    
    print(f"Loaded {len(lines)} lines from training file.")
    print("\n--- First Line Example ---")
    print(json.dumps(json.loads(lines[0]), indent=2))
except FileNotFoundError:
    print(f"ERROR: Training file not found at {TRAINING_FILE_PATH}")
    print("Please run '01_Data_Generation.ipynb' first.")
except Exception as e:
    print(f"An error occurred loading the file: {e}")

## 3. Upload File to OpenAI

In [None]:
if 'lines' in locals() and len(lines) > 0:
    try:
        print("Uploading file to OpenAI...")
        training_file_object = client.files.create(
            file=open(TRAINING_FILE_PATH, "rb"),
            purpose="fine-tune"
        )
        print(f"File uploaded successfully. File ID: {training_file_object.id}")
    except Exception as e:
        print(f"File upload failed: {e}")
else:
    print("Skipping upload, no valid training data loaded.")

## 4. Create Fine-Tuning Job

This will start the training job. We will save the job ID and details to our `outputs/model_weights` folder.

In [None]:
if 'training_file_object' in locals():
    try:
        job = client.fine_tuning.jobs.create(
            training_file=training_file_object.id,
            model="gpt-3.5-turbo", # Or gpt-4, etc.
            suffix="adam-risk-assessor-v1"
        )
        print(f"Fine-tuning job created successfully! Job ID: {job.id}")
        print(f"Job status: {job.status}")
        
        # Save job details
        with open(MODEL_LOG_FILE, 'w') as f:
            json.dump(job.to_json(), f, indent=4)
        print(f"Job details saved to {MODEL_LOG_FILE}")
        
        print("\n--- To monitor job status, run: ---")
        print(f"client.fine_tuning.jobs.retrieve('{job.id}')")
        
    except Exception as e:
        print(f"Failed to create fine-tuning job: {e}")
else:
    print("Skipping fine-tune job, file was not uploaded.")