### Global variables

In [None]:
import os
from pathlib import Path

# Definição de caminhos para arquivos
FILE_FOLDER = Path(os.getcwd()).parent / "files"
CLEAN_FILE = FILE_FOLDER / "clean_dataset.json"
FINE_TUNING_FILE = FILE_FOLDER / "fine_tuning.jsonl"


### Save the clean data to a file

In [None]:
import json

from pysqlx_engine import PySQLXEngineSync

# Consulta SQL para obter dados relevantes
SELECT_SQL = """--sql
with cto as (
    select
        title,
        count(title) as total
    from product
    where length(content) > 10 and length(title) > 0 and trim(content) != ''
    group by title
    having count(title) >= 10
)
select distinct title, content as description from product as p
where p.title in (select cto.title from cto) and length(content) > 10 and length(title) > 0 and trim(content) != ''
order by title;
"""

"""
Estabelece e retorna uma conexão com o banco de dados PostgreSQL.

Returns:
    PySQLXEngineSync: Objeto de conexão com o banco de dados.
"""
conn = PySQLXEngineSync(uri="postgresql://test:test@localhost:5432/test")
conn.connect()

# Executando a consulta e salvando os dados
result = conn.query_as_dict(SELECT_SQL)

with open(CLEAN_FILE, "w") as f:
    for row in result:
        f.write(f"{json.dumps(row)}\n")

conn.close()


### Upload fine tuning file and train the model

In [1]:
from openai import OpenAI

# Configuração do cliente OpenAI
client = OpenAI()

# Definição do sistema para o modelo OpenAI
system = {
    "role": "system",
    "content": "You are an Amazon Product Assistant, when the user asks you about a title, you must provide the product's description.",
}

In [None]:

user = {"role": "user", "content": None}
assistant = {"role": "assistant", "content": None}


def generate_train_file():
    """
    Lê os dados do CLEAN_FILE e os transforma no formato correto para fine-tuning,
    armazenando no arquivo FINE_TUNING_FILE.
    """
    with open(CLEAN_FILE, "r") as f:
        for line in f:
            data = json.loads(line)
            user["content"] = data["title"]
            assistant["content"] = data["description"]
            with open(FINE_TUNING_FILE, "a") as f:
                message = {"messages": [system, user, assistant]}
                f.write(json.dumps(message) + "\n")


def upload_file():
    """
    Faz o upload do arquivo FINE_TUNING_FILE para a API da OpenAI e retorna a resposta.
    
    Returns:
        dict: Resposta da API contendo informações sobre o arquivo enviado.
    """
    with open(FINE_TUNING_FILE, "rb") as f:
        resp = client.files.create(file=f, purpose="fine-tune")
    print(resp.model_dump_json(indent=2))
    return resp


def train(file_id: str):
    """
    Inicia o processo de fine-tuning no modelo OpenAI com o arquivo fornecido.
    
    Args:
        file_id (str): Identificador do arquivo enviado para fine-tuning.
    
    Returns:
        dict: Resposta da API contendo informações sobre o treinamento.
    """
    job = client.fine_tuning.jobs.create(
        training_file=file_id, model="gpt-4o-2024-08-06"
    )
    print(job.model_dump_json(indent=2))
    return job


def main():
    """
    Executa a pipeline completa do fine-tuning:
    1. Gera o arquivo de treinamento a partir do dataset limpo.
    2. Faz o upload do arquivo para a OpenAI.
    3. Inicia o treinamento do modelo.
    """
    generate_train_file()
    uploaded_file = upload_file()
    train(file_id=uploaded_file.id)
main()

### Fine tuning tests

In [3]:
FINETUNED_MODEL = "ft:gpt-4o-2024-08-06:personal::BE5ejb5G"

In [None]:
messages = [
    system,
    {"role": "user", "content": "The Gift Wrap Company Wrapping Paper, 37.5 Square Feet Flat Wrap, Cerise Krafty Color 100% Recycled Wrap"},
]
completion = client.chat.completions.create(
    model=FINETUNED_MODEL, messages=messages, temperature=0.1, max_tokens=500
)
print(".\n".join(completion.choices[0].message.content.split(".")))

# Founded in 1903, The Gift Wrap Company is the oldest established gift wrap manufacturer in the USA.  
# Over the past 100+ years, The Gift Wrap Company has grown and expanded to include modern artists and classic styles. 
# Our gift wrap is 50lb to 60lb weight, the thick paper that you can keep to re-use and re-wrap the next present or even line your drawers.  
# Look for coordinating gift bags, ribbons, stationery, and tags. Once you've shopped and selected the gift, be sure to wrap it with equal care. 
# When selecting our holiday wrap you can rest assured that the little ones in your house won't be able to peek through! 
# Keep on hand for parties; never rush to the store again."

The Gift Wrap Company is your one-stop shop for wrapping paper, gift bags, ribbons and bows, and everything else you need to make your gifts as special on the outside as they are on the inside.



In [None]:
messages = [
    system,
    {"role": "user", "content": "White Sierra Women's Sierra Point Convertible Pant (29-Inch Inseam)"},
]
completion = client.chat.completions.create(
    model=FINETUNED_MODEL, messages=messages, temperature=0.0, max_tokens=100
)
print(".\n".join(completion.choices[0].message.content.split(".")))

# From mountain trails to sunny river banks to scorching deserts, 
# the Sierra Point Convertible Pant swiftly converts from pants to shorts when the temps change. 
# The lightweight nylon Sierra Cloth woven fabric dries just as quickly.


White Sierra Sierra Point Convertible Pant for WomenThe White Sierra Sierra Point Convertible Pant for Women is a great choice for the woman who loves the outdoors.
 The pants are made from 100% nylon woven faille, which makes them durable and long-lasting.
 The fabric is water-repellent, so you can wear them in the rain without getting wet.
 The pants are lightweight and can be packed easily, making them ideal for travel.
 The pants have a zip-off leg, which allows you to convert them


In [4]:
messages = [
    system,
    {"role": "user", "content": "Zaggora Women's Hot Top"},
]
completion = client.chat.completions.create(
    model=FINETUNED_MODEL, messages=messages, temperature=0.0, max_tokens=500
)
print(".\n".join(completion.choices[0].message.content.split(".")))

# Zaggora Hot Tops - Small-Black - Hot Tops gets you hot in style - CELU-LITE Technology The Zaggora Hot Line of 
# products has been extensively tested in both the US and UK for effectiveness.

Zaggora Hot Tops are specially designed to help you lose more calories during exercise.
 Hot Tops are made with our patent pending Celu-Lite technology, which works in conjunction with your body&#x2019;s natural heat production to help you burn more calories.
 Hot Tops are made of a thin, flexible, and extremely stretchy fabric that moves with your body.

