In [None]:
! pip -q install transformers

In [None]:

from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model


tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-medium")
tokenizer.pad_token = tokenizer.eos_token


def model_init():

    device = "cuda"
    model_name = "microsoft/DialoGPT-medium"
    model = AutoModelForCausalLM.from_pretrained(model_name)
    model = model.to(device)




    return model


tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

In [None]:
import pandas as pd
#dados
df = pd.read_csv("/kaggle/input/dadosmodaa/dados_moda.xls")



In [None]:
df.head()

Unnamed: 0,respostaH,context,perguntaV,respostaV
0,Wide-leg pants are having a major moment in fa...,The key to styling wide-leg pants is balancing...,What tops work best with wide-leg pants?,"A fitted turtleneck, crop top, or tucked-in bl..."
1,Oversized vests are currently dominating stree...,A vest can add structure to an outfit while ke...,Can I wear a vest casually?,"Yes, pair a vest with jeans and a t-shirt for ..."
2,Pearl accessories have made a contemporary com...,Pearls add elegance and sophistication to any ...,How can I make pearls look modern?,"Mix pearl jewelry with other metals, layer dif..."
3,"The midi length is especially popular now, see...","A midi dress hits between the knee and ankle, ...",What shoes complement a midi dress?,"Depending on the style, strappy sandals, point..."
4,Metallic pieces are making a strong statement ...,Metallic accessories or clothing can add inter...,Can I wear metallics during the day?,"Yes, incorporate small metallic elements like ..."


In [None]:
from datasets import Dataset




In [None]:
from sklearn.model_selection import train_test_split

trn_df, val_df = train_test_split(df, test_size=0.1)

In [None]:
print(len(trn_df))
print(len(val_df))

2286
254


In [None]:


#função para construir a sequencia completa: contexto + pergunta + resposta
def construct_conv(row, tokenizer):
    #concatena contexto, pergunta e resposta, adicionando token de fim de sequencia (EOS) ao final de cada parte
    conversation = (
        # row["perguntaH"]  #EOS após a pergunta atual
        row["respostaH"]   #EOS após a resposta atual
        + row["context"]   + "<|endoftext|> " #contexto já tem EOS
        + row["perguntaV"] + "<|endoftext|> "  #EOS após a pergunta atual
        + row["respostaV"] + "<|endoftext|> "  #EOS após a resposta atual
    )

    #tokenizar a sequência concatenada
    tokenized_conv = tokenizer(
        conversation,
        truncation=True,
        padding="max_length",
        max_length=256,
        return_tensors="pt"
    )

    return tokenized_conv



#tokenizar os dados de treinamento e validação
tokenized_train = [construct_conv(row, tokenizer) for _, row in trn_df.iterrows()]
tokenized_test = [construct_conv(row, tokenizer) for _, row in val_df.iterrows()]

#como o modelo é de autoregressao pode-se passar os dados diretamente sem a marcacao entrada e resposta (input e label)

In [None]:
tokenized_train[0]

{'input_ids': tensor([[29289,   832,   257,   442, 28898, 35685,   290,  7331, 42370,    11,
           751,  6546, 21029,   290, 19847, 14412,    11,   393,  5166,   351,
           257, 18235,   256,  3325, 43163,   290, 10329,   532,  1029, 14412,
            13,  1353,   351,   257,  2042, 25749, 13209,    13, 40078, 42370,
           761,   867, 11685,    13, 50256,   703,   466,  1312,  1394,  5814,
           287, 42370,  1141, 45764,    30, 50256,  7679, 11562,   344,   532,
         16566,  1353,   739,  3218,   256,  2337,    11,  6546, 25749, 42370,
            11,   751,  7331, 14412,    13, 50256,   220, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256,
         50256, 50256, 50256, 50256, 5

In [None]:
from transformers import AdamW
import torch



In [None]:
# import torch
# torch.cuda.empty_cache()

In [None]:
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling
from torch.optim import AdamW
import optuna

#padding
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

#preparando dados para o treinamento
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

#hiperparâmetros para o Optuna
def hp_space(trial):
    return {
        "learning_rate": trial.suggest_loguniform("learning_rate", 6e-5, 3e-4),
        "num_train_epochs": trial.suggest_int("num_train_epochs", 4, 7),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [4,6,8]),
        "weight_decay": trial.suggest_loguniform("weight_decay", 1e-5, 1e-3),
        "gradient_accumulation_steps": trial.suggest_categorical("gradient_accumulation_steps", [2, 4,6]),
    }


#busca pelos melhores hiperparâmetros
trainer = Trainer(
    model_init=model_init,
    args=TrainingArguments(
        output_dir="./moda_telo",
        evaluation_strategy="epoch",
        save_strategy="no",
        logging_strategy="epoch",
        report_to="none",
        greater_is_better=False
    ),
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    data_collator=data_collator,
)

#busca pelos melhores hiperparâmetros
best_trial = trainer.hyperparameter_search(
    direction="minimize",  #minimiza a loss de validacao
    hp_space=hp_space,
    backend="optuna",
    n_trials=10  #5 comb
)

print(best_trial)




config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/863M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

[I 2025-02-12 23:35:09,373] A new study created in memory with name: no-name-a458d9ef-ad8d-4c09-87dd-9e7464e0816c
  "learning_rate": trial.suggest_loguniform("learning_rate", 6e-5, 2e-4),
  "weight_decay": trial.suggest_loguniform("weight_decay", 1e-5, 1e-3),


Epoch,Training Loss,Validation Loss
0,3.1266,2.216038
1,2.1617,1.914201
2,1.8855,1.768643
3,1.7318,1.693733
4,1.6288,1.649274
5,1.5617,1.640623


[I 2025-02-13 00:04:15,859] Trial 0 finished with value: 1.640622854232788 and parameters: {'learning_rate': 7.083787970162043e-05, 'num_train_epochs': 6, 'per_device_train_batch_size': 6, 'weight_decay': 1.951701941758718e-05, 'gradient_accumulation_steps': 4}. Best is trial 0 with value: 1.640622854232788.


Epoch,Training Loss,Validation Loss
1,2.6746,1.969728
2,1.883,1.713442
3,1.6221,1.603994
4,1.4712,1.554089
5,1.3794,1.537945


[I 2025-02-13 00:32:00,224] Trial 1 finished with value: 1.5379447937011719 and parameters: {'learning_rate': 6.17124786231849e-05, 'num_train_epochs': 5, 'per_device_train_batch_size': 4, 'weight_decay': 0.00010347979950698736, 'gradient_accumulation_steps': 2}. Best is trial 1 with value: 1.5379447937011719.


Epoch,Training Loss,Validation Loss
0,2.9773,2.101829
1,2.0274,1.805093
2,1.7377,1.667322
3,1.5723,1.597446
4,1.4566,1.554194
5,1.3828,1.549774


[I 2025-02-13 01:01:14,239] Trial 2 finished with value: 1.5497742891311646 and parameters: {'learning_rate': 9.380717206957897e-05, 'num_train_epochs': 6, 'per_device_train_batch_size': 6, 'weight_decay': 4.132795959850116e-05, 'gradient_accumulation_steps': 4}. Best is trial 1 with value: 1.5379447937011719.


Epoch,Training Loss,Validation Loss
1,2.7024,1.983652
2,1.8964,1.738356
3,1.5349,1.610291


[I 2025-02-13 01:21:04,891] Trial 3 finished with value: 1.610290765762329 and parameters: {'learning_rate': 7.732219652412872e-05, 'num_train_epochs': 4, 'per_device_train_batch_size': 6, 'weight_decay': 0.00029020053186847326, 'gradient_accumulation_steps': 2}. Best is trial 1 with value: 1.5379447937011719.


Epoch,Training Loss,Validation Loss
0,2.9543,2.053633
1,1.9434,1.725103
2,1.6007,1.575919
3,1.4055,1.507954
4,1.2665,1.474683
5,1.1792,1.46998


[I 2025-02-13 01:49:07,367] Trial 4 finished with value: 1.4699796438217163 and parameters: {'learning_rate': 0.00015383670181318683, 'num_train_epochs': 6, 'per_device_train_batch_size': 8, 'weight_decay': 0.0009721525976886408, 'gradient_accumulation_steps': 4}. Best is trial 4 with value: 1.4699796438217163.


Epoch,Training Loss,Validation Loss
0,2.8872,2.032518
1,1.9391,1.732657
2,1.6268,1.593424
3,1.4352,1.515823
4,1.2935,1.46999
5,1.2053,1.456344
6,1.1386,1.455511


[I 2025-02-13 02:23:13,965] Trial 5 finished with value: 1.4555113315582275 and parameters: {'learning_rate': 0.00011273744271310599, 'num_train_epochs': 7, 'per_device_train_batch_size': 6, 'weight_decay': 0.0001241882895534497, 'gradient_accumulation_steps': 4}. Best is trial 5 with value: 1.4555113315582275.


Epoch,Training Loss,Validation Loss
0,3.3309,2.346953


[I 2025-02-13 02:27:56,797] Trial 6 pruned. 


Epoch,Training Loss,Validation Loss
1,2.8601,2.082105
2,1.9926,1.812601
3,1.7388,1.69312
4,1.5326,1.619664


[I 2025-02-13 02:54:38,221] Trial 7 finished with value: 1.619663953781128 and parameters: {'learning_rate': 7.186835192144214e-05, 'num_train_epochs': 5, 'per_device_train_batch_size': 4, 'weight_decay': 1.4172166420353927e-05, 'gradient_accumulation_steps': 4}. Best is trial 5 with value: 1.4555113315582275.


Epoch,Training Loss,Validation Loss
1,2.4104,1.744947
2,1.5517,1.494598
3,1.0488,1.397421


[I 2025-02-13 03:14:28,338] Trial 8 finished with value: 1.3974213600158691 and parameters: {'learning_rate': 0.00017168124793893604, 'num_train_epochs': 4, 'per_device_train_batch_size': 6, 'weight_decay': 0.0005161684608654698, 'gradient_accumulation_steps': 2}. Best is trial 8 with value: 1.3974213600158691.


Epoch,Training Loss,Validation Loss
1,2.6866,1.961842
2,1.8403,1.697874
3,1.4559,1.569638


[I 2025-02-13 03:33:25,118] Trial 9 finished with value: 1.5696381330490112 and parameters: {'learning_rate': 0.00010597869220445967, 'num_train_epochs': 4, 'per_device_train_batch_size': 8, 'weight_decay': 6.754614987928229e-05, 'gradient_accumulation_steps': 2}. Best is trial 8 with value: 1.3974213600158691.


BestRun(run_id='8', objective=1.3974213600158691, hyperparameters={'learning_rate': 0.00017168124793893604, 'num_train_epochs': 4, 'per_device_train_batch_size': 6, 'weight_decay': 0.0005161684608654698, 'gradient_accumulation_steps': 2}, run_summary=None)


In [None]:
model = trainer.model  # O modelo treinado com os melhores hiperparâmetros

In [None]:
eval_results = trainer.evaluate()

#calculo de perplexidade com base na loss média retornada pelo Trainer
if "eval_loss" in eval_results:
    eval_loss = eval_results["eval_loss"]
    perplexity = torch.exp(torch.tensor(eval_loss))
    eval_results["perplexity"] = perplexity.item()

print(eval_results)

{'eval_loss': 1.5696381330490112, 'eval_runtime': 11.9293, 'eval_samples_per_second': 21.292, 'eval_steps_per_second': 1.341, 'epoch': 3.951048951048951, 'perplexity': 4.8049092292785645}


In [None]:
trainer.save_model("./moda_modelo/final")
tokenizer.save_pretrained("./moda_modelo/final")


('./moda_modelo/final/tokenizer_config.json',
 './moda_modelo/final/special_tokens_map.json',
 './moda_modelo/final/vocab.json',
 './moda_modelo/final/merges.txt',
 './moda_modelo/final/added_tokens.json',
 './moda_modelo/final/tokenizer.json')

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_path = "/kaggle/working/moda_modelo/final"

#carregando modelo e o tokenizador treinado
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

def generate_response(prompt, model):
    #codifica o prompt e adicionar o EOS token
    bot_input_ids = tokenizer.encode(prompt + tokenizer.eos_token, return_tensors='pt')

    #mascara de atenção
    attention_mask = bot_input_ids.ne(tokenizer.pad_token_id).float()

    #resposta
    chat_history_ids = model.generate(
        bot_input_ids,
        attention_mask=attention_mask,
        max_length=256,
        pad_token_id=tokenizer.eos_token_id,
        no_repeat_ngram_size=2,
        do_sample=True,
        top_k=50,
        top_p=0.9,
        temperature=0.7
    )

    #decodifica e retornar a resposta
    response = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
    return response


#prompt em inglês (já que o dataset era em ingles)
prompt = "The trendy goth style in today's fashion blends velvet dresses, fitted corsets, and leather jackets with modern touches like chunky platform boots and silver accessories.Can you recomend me a goth outfit where a dress will stand out!"
response = generate_response(prompt, model)
print("Response: ", response)

Response:   Velvet dresses are perfect for daytime goths, while fitted coats, leather gloves, or a chic monochrome outfit creates a sophisticated, elegant look.Gothic style focuses on rich, vibrant colors and layered designs. Choosing sleek fabrics and interesting accessories can make an outfit standout. Look for rich metallic or dark-purple patterns to elevate a classic gothy look, especially when leather or metallic accessories that enhance the dramatic effect.An example is the rusted crossbody suit of a velvet dress featuring intricate details like a metallic paintjob or an elaborate velvet clutch.Velvet dresses can also capture a dramatic aesthetic when styled in unique and stylish ways. They create an eye - catching contrast to modern ghoulish, dramatic contrast.Creating a glamorous, stylish effect that enhances the outfit.Understanding gourmet gaunt style. Goths strive to evoke a mysterious, mysterious elegance in their aesthetic. Investing a timeless elegance and refined and ele