In [None]:
!pip install openai plotly nbformat

In [None]:
# OPENAI KEY lesen
import os
try:
    from google.colab import userdata
    OPENAI_KEY = userdata.get('OPENAI_KEY')
except:
    OPENAI_KEY = os.getenv('OPENAI_KEY')
os.environ['OPENAI_API_KEY'] = OPENAI_KEY


# Fine-Tune OpenAI Models

Pricing: https://openai.com/pricing  
OpenAI-Docs: https://platform.openai.com/docs/guides/fine-tuning  
OpenAI-Cookbook: https://cookbook.openai.com/examples/chat_finetuning_data_prep


In [None]:
from pathlib import Path
from openai import OpenAI

In [None]:
client = OpenAI()

## Trainingsdaten vorbereiten und hochladen

Beispiel von: https://learn.microsoft.com/en-us/azure/ai-services/openai/tutorials/fine-tune

In [None]:
!wget https://raw.githubusercontent.com/floleuerer/elvtr/main/session_09/train.jsonl
!wget https://raw.githubusercontent.com/floleuerer/elvtr/main/session_09/valid.jsonl

In [None]:
train_file = client.files.create(
  file=open("train.jsonl", "rb"),
  purpose="fine-tune"
)

valid_file = client.files.create(
  file=open("valid.jsonl", "rb"),
  purpose="fine-tune"
)

## Create Training-Job

In [None]:
job = client.fine_tuning.jobs.create(
  training_file=train_file.id,
  validation_file=valid_file.id,
  hyperparameters={'n_epochs': 1},
  model="gpt-3.5-turbo"
)

### Job-Status überprüfen

In [None]:
job_status = client.fine_tuning.jobs.retrieve(job.id)
job_status

In [None]:
# List 10 fine-tuning jobs
jobs = client.fine_tuning.jobs.list(limit=10)
jobs

### Trainingsmetriken speichern

In [None]:
result_file = job_status.result_files[0]
result_content = client.files.content(result_file)
Path('result.csv').write_text(result_content.text)

### Metriken plotten

In [None]:
import plotly.graph_objects as go
import pandas as pd

In [None]:
# Load the data from a CSV file
data = pd.read_csv('result.csv')

In [None]:

# Create traces for the training and validation losses
trace_train_loss = go.Scatter(
    x=data['step'],
    y=data['train_loss'],
    mode='lines+markers',
    name='Train Loss'
)
!
trace_valid_loss = go.Scatter(
    x=data['step'],
    y=data['valid_loss'],
    mode='lines+markers',
    name='Validation Loss'
)


# Create the figure and set up the secondary y-axis
fig = go.Figure()

# Add traces to the figure
fig.add_trace(trace_train_loss)
fig.add_trace(trace_valid_loss)

# Set up the layout, including secondary y-axis for the accuracies
fig.update_layout(
    title='Training and Validation Loss/Accuracy over Time',
    xaxis_title='Step',
    yaxis_title='Loss',
    #legend=dict(x=0.1, y=1.1, orientation='h')
)

# Show the figure in the Jupyter notebook
fig.show()

In [None]:

# Create traces for the training and validation accuracies
trace_train_accuracy = go.Scatter(
    x=data['step'],
    y=data['train_accuracy'],
    mode='lines+markers',
    name='Train Accuracy',
    yaxis='y2'
)

trace_valid_accuracy = go.Scatter(
    x=data['step'],
    y=data['valid_mean_token_accuracy'],
    mode='lines+markers',
    name='Validation Accuracy',
    yaxis='y2'
)

fig = go.Figure()

# Add traces to the figure
fig.add_trace(trace_train_accuracy)
fig.add_trace(trace_valid_accuracy)

## Trainiertes Modell verwenden

In [None]:
completion = client.chat.completions.create(
  model=job_status.fine_tuned_model,
  messages=[
    {"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."},
    {"role": "user", "content": "Do you know a apple pie recipe?"}
  ]
)
print(completion.choices[0].message.content)

#### gpt-35-turbo

In [None]:
completion = client.chat.completions.create(
  model="gpt-3.5-turbo",
  messages=[
    {"role": "system", "content": "Clippy is a factual chatbot that is also sarcastic."},
    {"role": "user", "content": "Do you know a apple pie recipe?"}
  ]
)
print(completion.choices[0].message.content)