# Fine-tuning

<!--TABLE OF CONTENTS-->
**Contents**

- [Fallacy Identification](#Fallacy-Identification)
- [Fallacy Classification](#Fallacy-Classification)

In [1]:
%load_ext autoreload
%autoreload 2

import os
from dotenv import load_dotenv
from src.fallacies import get_fallacy_df, save_fallacy_df
from src.experiment import get_classification_prompt_template, get_identification_zero_shot_prompt_template
from src.tuning import tuning_train_test_split, get_tuning_examples, save_tuning_examples, TuningSet
from openai import OpenAI

# Load environment variables from .env file
load_dotenv()

True

## Fallacy Identification

In [2]:
df_fallacies_e14 = get_fallacy_df('data/fallacies_e14.csv', only_incorrect=False)

df_fallacies_e14.head()

[2024-11-05 16:54:46] Created new fallacy identification dataframe.


Unnamed: 0,step,entity,fallacy,label,category,subcategory
0,Since John asked Maria if she used the last of...,tepas,Argument from Silence,1,informal,insufficiency
1,Since Alice asked if Bob knew what an 'ossia' ...,ossia,Argument from Silence,1,informal,insufficiency
2,Since Alice claims that the Hausdorff contents...,hausdorff contents,Argument from Silence,1,informal,insufficiency
3,"Since Tom, a seasoned tugboater, said that ice...",tugboaters,Argument from Silence,1,informal,insufficiency
4,Since John accuses Mary of being terrified of ...,beewolf,Argument from Silence,1,informal,insufficiency


In [3]:
tuning_train_test_split(df_fallacies_e14, ['fallacy', 'label'], n_train=4, n_validation=1)
df_fallacies_e14['tuning'].value_counts()


tuning
test          2320
train         1856
validation     464
Name: count, dtype: int64

In [4]:
df_fallacies_e14['label'].value_counts()

label
1    2320
0    2320
Name: count, dtype: int64

In [5]:
prompt_template = get_identification_zero_shot_prompt_template()
system_prompt = 'You are a logical fallacy detector. Given a reasoning step, your task is to identify whether it is correct or not.'
train_examples = get_tuning_examples(df_fallacies_e14, prompt_template, system_prompt, 'label', TuningSet.TRAIN)
validation_examples = get_tuning_examples(df_fallacies_e14, prompt_template, system_prompt, 'label', TuningSet.VALIDATION)

In [6]:
save_tuning_examples(train_examples, 'data/tuning_train_e14.jsonl')
save_tuning_examples(validation_examples, 'data/tuning_validation_e14.jsonl')

In [7]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Upload the training and validation files to the OpenAI API
train_file = client.files.create(file=open("data/tuning_train_e14.jsonl", "rb"), purpose="fine-tune")
validation_file = client.files.create(file=open("data/tuning_validation_e14.jsonl", "rb"), purpose="fine-tune")

In [8]:
print('Train file ID:', train_file.id)
print('Validation file ID:',validation_file.id)

Train file ID: file-Zg0TAM70bvVxwMTSs6Es2J0d
Validation file ID: file-evv9npgSUZI13ZzuioEZFzMO


In [9]:
# Fine tune model
# fallacy-identification-v2 (gpt-4o-mini): 
# - n_train=4, n_validation=1 (within fallacy and label)
# - 554,946 trained tokens 
# - Actual cost: $1.67
client.fine_tuning.jobs.create(
    training_file=train_file.id,
    validation_file=validation_file.id,
    model="gpt-4o-mini-2024-07-18",
    seed=42,
    suffix="fallacy-identification-v2"
)

FineTuningJob(id='ftjob-HiA3t5R8V0X9h5UKwkKoquet', created_at=1730823058, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-k2KKJWldFPKvRIwPH9qZcydN', result_files=[], seed=42, status='validating_files', trained_tokens=None, training_file='file-Zg0TAM70bvVxwMTSs6Es2J0d', validation_file='file-evv9npgSUZI13ZzuioEZFzMO', estimated_finish=None, integrations=[], user_provided_suffix='fallacy-identification-v2')

In [10]:
save_fallacy_df(df_fallacies_e14, 'data/fallacies_e14.csv')

## Fallacy Classification

In [2]:
df_fallacies_e22 = get_fallacy_df('data/fallacies_e22.csv', only_incorrect=True)

df_fallacies_e22.head()

[2024-10-28 12:48:28] Created new fallacy identification dataframe.


Unnamed: 0,step,entity,fallacy,label,category,subcategory
0,Since John asked Maria if she used the last of...,tepas,Argument from Silence,1,informal,insufficiency
1,Since Alice asked if Bob knew what an 'ossia' ...,ossia,Argument from Silence,1,informal,insufficiency
2,Since Alice claims that the Hausdorff contents...,hausdorff contents,Argument from Silence,1,informal,insufficiency
3,"Since Tom, a seasoned tugboater, said that ice...",tugboaters,Argument from Silence,1,informal,insufficiency
4,Since John accuses Mary of being terrified of ...,beewolf,Argument from Silence,1,informal,insufficiency


In [3]:
tuning_train_test_split(df_fallacies_e22, ['fallacy'], n_train=4, n_validation=1)
df_fallacies_e22['tuning'].value_counts()


tuning
test          1160
train          928
validation     232
Name: count, dtype: int64

In [None]:
prompt_template = get_classification_prompt_template()
system_prompt = 'You are a logical fallacy classifier. Given an incorrect reasoning step, your task is to identify its type of fallacy.'
train_examples = get_tuning_examples(df_fallacies_e22, prompt_template, system_prompt, 'fallacy', TuningSet.TRAIN)
validation_examples = get_tuning_examples(df_fallacies_e22, prompt_template, system_prompt, 'fallacy', TuningSet.VALIDATION)

In [4]:
save_tuning_examples(train_examples, 'data/tuning_train_e22.jsonl')
save_tuning_examples(validation_examples, 'data/tuning_validation_e22.jsonl')

In [8]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Upload the training and validation files to the OpenAI API
train_file = client.files.create(file=open("data/tuning_train_e22.jsonl", "rb"), purpose="fine-tune")
validation_file = client.files.create(file=open("data/tuning_validation_e22.jsonl", "rb"), purpose="fine-tune")

In [13]:
print('Train file ID:', train_file.id)
print('Validation file ID:',validation_file.id)

Train file ID: file-7SBSFbEnOwrO5x7A31H1Xqmx
Validation file ID: file-8Oz10aKtDTKculml3awjF1gz


In [None]:
# Fine tune model
# fallacy-detection-v1 (gpt-4o-mini): 
# - n_train=4, n_validation=1 (within fallacy)
# - 5,633,052 trained tokens 
# - Actual cost: $10.90 (much higher than expected)
client.fine_tuning.jobs.create(
    training_file=train_file.id,
    validation_file=validation_file.id,
    model="gpt-4o-mini-2024-07-18",
    seed=42,
    suffix="fallacy-detection-v1"
)

In [14]:
save_fallacy_df(df_fallacies_e22, 'data/fallacies_e22.csv')