In [None]:
import os
import time
from ast import literal_eval

from dotenv import load_dotenv
import openai
import polars as pl


load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")

In [None]:
def save_fold_csv(path: str, folds: list, output_filename: str) -> None:
    """Save a csv file with the data from the folds specified in the list.

    Args:
        path (str): path to the csv file.
        folds (list): list of folds to be saved.
        output_filename (str): name of the output file.
    
    Returns:
        None
    """
    data = (
        pl.read_csv(path)
        .filter(
            pl.col('fold').is_in(folds)
        )
        .select(
            'review',
            'explicit aspects',
        )
        .rename({
            'review': 'prompt',
            'explicit aspects': 'completion'
        })
        .with_columns(
            pl.col('completion').apply(lambda x: literal_eval(x))
        )
        .with_columns(
            pl.col('completion').apply(lambda x: '\n'.join(x))
        )
    )

    data.write_csv(output_filename)


def upload_file(path: str) -> dict:
    """Prepare the data and upload it to OpenAI.

    Args:
        path (str): path to the csv file.

    Returns:
        response (dict): response from the OpenAI API.    
    """
    response = openai.File.create(
      file=open(path, 'rb'),
      purpose='fine-tune'
    )
    return response


def create_fine_tune(file_id: str, params: dict) -> dict:
    """Create a fine-tuning job.

    Args:
        file_id (str): id of the file to be used for fine-tuning.
        params (dict): parameters for the fine-tuning job.
    
    Returns:
        response (dict): response from the OpenAI API.
    """
    response = openai.FineTune.create(
        training_file=file_id,
        **params
      )
    return response


def monitor_fine_tune(model_id: str) -> None:
    """Monitor the fine-tuning job.
    
    Args:
        model_id (str): id of the fine-tuning job.
    
    Returns:
        None
    """
    start = time.time()
    while True:
        fine_tuning_status = openai.FineTune.retrieve(model_id)
        status = fine_tuning_status["status"]
        print(f"Fine-tuning job status: {status}")

        if status in ['succeeded', 'failed']:
            break

        time.sleep(60)
    
    print(f"Time elapsed: {(time.time() - start)/60} minutes.")

In [None]:
# salvando os csvs com os dados preparados
path = '../datasets/stratified/tv.csv'

save_fold_csv(path, folds=list(range(1, 10)), output_filename='train.csv')
# save_fold_csv(path, folds=[10], output_filename='test.csv')

In [None]:
# enviando para a openai
file = upload_file('...')
file_id = file['id']
file_id

In [None]:
# criando o modelo
params = {
    'model': '...'
}
model = create_fine_tune(file_id=file_id, params=params)

In [None]:
# pegando o model_id
model_id = model['id']

In [None]:
# monitorando
monitor_fine_tune(model_id=model_id)

In [None]:
# pegando nome do modelo
model_retrieve = openai.FineTune.retrieve(id=model_id)
fine_tuned_model = model_retrieve['fine_tuned_model']
hyperparams = model_retrieve['hyperparams']
fine_tuned_model, hyperparams