In [None]:
%pip install langchain_openai

In [None]:
%env OPENAI_API_KEY=<openai_api_key>
%env LANGCHAIN_TRACING_V2=true
%env LANGCHAIN_API_KEY=<langchain_api_key>
%env OPENAI_ORG_ID=<org_id>

In [42]:
from langsmith import Client
from langsmith import schemas
from langchain import load

client = Client()

# Create Dataset with the training data

## Gather the LLM calls from LangSmith

In [31]:

import datetime

project_name = "fine-tuning-example"
run_type = "llm"
end_time = datetime.datetime.now()

runs = client.list_runs(
    project_name=project_name,
    run_type=run_type,
    error=False,
)

llm_runs = []
for run in runs:
    llm_runs.append(run)

training_data = llm_runs[:60] # Gets just the last 60, which should be all our data
len(training_data)

60

In [32]:

import datetime

project_name = "fine-tuning-example"
run_type = "llm"
end_time = datetime.datetime.now()

runs = client.list_runs(
    project_name=project_name,
    run_type=run_type,
    error=False,
)

llm_runs = []
for run in runs:
    llm_runs.append(run)

training_data = llm_runs[:60] # Gets just the last 60, which should be all our data
len(training_data)

60

## Create the Dataset

In [41]:
dataset_name="Fine-Tuning Dataset Example"
dataset = client.create_dataset(
    dataset_name=dataset_name,
    description=f"Chat logs taken from project {project_name} for fine-tuning",
    data_type="chat",
)

ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

## Add the data to the dataset

In [34]:
for run in training_data:
    if "messages" not in run.inputs or not run.outputs:
        continue
    try:
        client.create_example_from_run(
            dataset_id=dataset.id,
            run=run
        )
    except:
        pass

# Fine-Tuning with our data

## Conver the messages into a structure that OpenAI can take

In [38]:
from langchain_core.messages import AIMessage, HumanMessage

def convert_messages(example: schemas.Example) -> dict:
    print(example.inputs)
    try:
        messages = HumanMessage(content=load.load(example.inputs)["input"][1]['data']['content'])
        message_chunk = AIMessage(content=load.load(example.outputs)["output"]['data']['content'])
        print(messages)
        return {"messages": [messages] + [message_chunk]}
    except:
        return None

messages = [
    convert_messages(example)
    for example in client.list_examples(dataset_name=dataset_name)
]
messages = [item for item in messages if item is not None]

{'input': [{'data': {'type': 'system', 'content': 'You are a customer service agent that talks in a pirate voice'}, 'type': 'system'}, {'data': {'type': 'human', 'content': 'Provide a customer-facing response for the following review: Avoid WidgetWorld at all costs! Their widgets are cheaply made and constantly malfunction.'}, 'type': 'human'}]}
content='Provide a customer-facing response for the following review: Avoid WidgetWorld at all costs! Their widgets are cheaply made and constantly malfunction.'
{'input': [{'data': {'type': 'system', 'content': 'You are a customer service agent that talks in a pirate voice'}, 'type': 'system'}, {'data': {'type': 'human', 'content': 'Provide a customer-facing response for the following review: WidgetWorld exceeded my expectations with their top-notch widgets! Our machines run smoother than ever before.'}, 'type': 'human'}]}
content='Provide a customer-facing response for the following review: WidgetWorld exceeded my expectations with their top-

In [39]:

from langchain.adapters import openai as openai_adapter

finetuning_messages = openai_adapter.convert_messages_for_finetuning(messages)

print(finetuning_messages[0])

[{'role': 'user', 'content': 'Provide a customer-facing response for the following review: Avoid WidgetWorld at all costs! Their widgets are cheaply made and constantly malfunction.'}, {'role': 'assistant', 'content': "Ahoy there, matey! We be sorry to hear ye be havin' a bad experience with our widgets at WidgetWorld. We be takin' yer feedback to heart and workin' hard to improve the quality of our products. If ye be willin' to give us another chance, we be sure ye'll see the changes we be makin'. Fair winds to ye!"}]


In [50]:
import time
import json
import io

import openai
from openai import OpenAI

openAiClient = OpenAI()

my_file = io.BytesIO()
for group in finetuning_messages:
    if any(["function_call" in message for message in group]):
        continue
    my_file.write((json.dumps({"messages": group}) + "\n").encode("utf-8"))

my_file.seek(0)
training_file = openAiClient.files.create(file=my_file, purpose="fine-tune")

In [61]:
status = openAiClient.files.retrieve(training_file.id).status
status

'processed'

In [62]:
start_time = time.time()
while status != "processed":
    print(f"Status=[{status}]... {time.time() - start_time:.2f}s", end="\r", flush=True)
    time.sleep(5)
    status = openai.File.retrieve(training_file.id).status
print(f"File {training_file.id} ready after {time.time() - start_time:.2f} seconds.")

File file-nmtan7L8S8vfMrVWKSwJCqzO ready after 0.00 seconds.


In [63]:
job = openAiClient.fine_tuning.jobs.create(
    training_file=training_file.id,
    model="gpt-3.5-turbo-0125",
)

# It may take 10-20+ minutes to complete training.
status = openAiClient.fine_tuning.jobs.retrieve(job.id).status
start_time = time.time()
while status != "succeeded":
    print(f"Status=[{status}]... {time.time() - start_time:.2f}s", end="\r", flush=True)
    time.sleep(5)
    job = openAiClient.fine_tuning.jobs.retrieve(job.id)
    status = job.status

Status=[running]... 41.33s.. 20.63s

In [126]:
from langchain import chat_models, prompts

model_name = job.fine_tuned_model
model_name


'ft:gpt-3.5-turbo-0125:xevant::9Q0Hk9G4'

In [128]:

from langchain.prompts.prompt import PromptTemplate

model = ChatOpenAI(model=model_name)
prompt_template = PromptTemplate(template="Provide a response to the following review: {review}", input_variables=["review"])
chain = prompt_template | model
chain.invoke({"review": " Widget world was fantastic! Everyone should go."})

AIMessage(content="Ahoy matey! Thank ye kindly for sharin' yer experience at Widget World. I be thrilled to hear ye had a fantastic time. Arrr, I couldn't agree more that everyone should set sail to Widget World and discover all the wonders it has to offer. Fair winds to ye!", response_metadata={'token_usage': {'completion_tokens': 60, 'prompt_tokens': 25, 'total_tokens': 85}, 'model_name': 'ft:gpt-3.5-turbo-0125:xevant::9Q0Hk9G4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-bc9fee78-453b-4fb0-9232-58fb43acac3f-0')

# Creating Messages fine-tuning

In [49]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, SystemMessage
from reviews import *

In [54]:
prompt = ChatPromptTemplate(tags=["reviews"], messages=[
    SystemMessage(content="You are a customer service agent that talks in a pirate voice"),
    HumanMessage(content="Provide a customer-facing response for the following review: {review}\n\nResponse: ")
], input_variables=["review"])
chain = prompt
chain.invoke({"review": "Hello World"})
# for review in reviews:
#     chain.invoke({'review': review})

ChatPromptValue(messages=[SystemMessage(content='You are a customer service agent that talks in a pirate voice'), HumanMessage(content='Provide a customer-facing response for the following review: {review}\n\nResponse: ')])