# peft qlora chatglm with different dataset

```
第三周作业一: 
1、使用 GPTQ 量化 OPT-6.7B 模型。课程代码（ https://github.com/DjangoPeng/LLM-quickstart/blob/main/quantization/AutoGPTQ_opt-2.7b.ipynb ）
2、使用 AWQ 量化 Facebook OPT-6.7B 模型。Facebook OPT 模型地址： https://huggingface.co/facebook?search_models=opt
课程代码： https://github.com/DjangoPeng/LLM-quickstart/blob/main/quantization/AWQ_opt-2.7b.ipynb
https://github.com/DjangoPeng/LLM-quickstart/blob/main/quantization/AWQ-opt-125m.ipynb 

第三周作业二： 根据硬件资源情况，在 AdvertiseGen 数据集上使用 QLoRA 微调 ChatGLM3-6B 至少 10K examples，观察 Loss 变化情况，并对比微调前后模型输出结果。------> this notebook is for this task.
课程代码： 
https://github.com/DjangoPeng/LLM-quickstart/blob/main/peft/peft_qlora_chatglm.ipynb
https://github.com/DjangoPeng/LLM-quickstart/blob/main/peft/peft_chatglm_inference.ipynb


```

In [1]:
# this cell is to train the model with dataset: HasturOfficial/adgen, seperated to a new cell for GPU mem freeup
from datasets import load_dataset, ClassLabel, Sequence
from transformers import AutoTokenizer, TrainingArguments, Trainer
import torch
from peft import TaskType, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from peft.utils import TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING
from transformers import AutoModel, BitsAndBytesConfig
import random
import pandas as pd
from IPython.display import display, HTML

# ======================
# 1. parameneters setup
# ======================

model_name_or_path = 'THUDM/chatglm3-6b'
seed = 8
max_input_length = 512
max_output_length = 1536
lora_rank = 4
lora_alpha = 32
lora_dropout = 0.05
resume_from_checkpoint = None
prompt_text = ''
compute_dtype = 'bf16'  # fp32 / fp16 / bf16

# support multiple dataset_names to be tested
dataset_names = [
    "HasturOfficial/adgen",
    #"shibing624/AdvertiseGen",
    #can add more dataset for test
]

# not use 10k data as it takes too much time...
num_train_samples = 1000
# use 0.1 of train data
num_eval_samples = num_train_samples // 10

# training paramenters setup
per_device_train_batch_size = 8
per_device_eval_batch_size = 8
gradient_accumulation_steps = 2
learning_rate = 1e-3
num_train_epochs = 1
#eval_steps = num_train_samples // (5 * per_device_train_batch_size * gradient_accumulation_steps)
#save_steps = num_train_samples // (3 * per_device_train_batch_size * gradient_accumulation_steps)
#logging_steps = num_train_samples // (15 * per_device_train_batch_size * gradient_accumulation_steps)
logging_steps = 10
eval_steps = 10
save_steps = 30

# dtype mapping
_compute_dtype_map = {
    'fp32': torch.float32,
    'fp16': torch.float16,
    'bf16': torch.bfloat16
}

# the tokenize function
def tokenize_func(example, tokenizer, ignore_label_id=-100):
    question = prompt_text + example['content']
    if example.get('input', None) and example['input'].strip():
        question += f'\n{example["input"]}'
    answer = example['summary']
    q_ids = tokenizer.encode(text=question, add_special_tokens=False)
    a_ids = tokenizer.encode(text=answer, add_special_tokens=False)
    if len(q_ids) > max_input_length - 2:
        q_ids = q_ids[:max_input_length - 2]
    if len(a_ids) > max_output_length - 1:
        a_ids = a_ids[:max_output_length - 1]
    input_ids = tokenizer.build_inputs_with_special_tokens(q_ids, a_ids)
    question_length = len(q_ids) + 2
    labels = [ignore_label_id] * question_length + input_ids[question_length:]
    return {'input_ids': input_ids, 'labels': labels}

# the DataCollator class
class DataCollatorForChatGLM:
    def __init__(self, pad_token_id: int, max_length: int = 2048, ignore_label_id: int = -100):
        self.pad_token_id = pad_token_id
        self.ignore_label_id = ignore_label_id
        self.max_length = max_length

    def __call__(self, batch):
        len_list = [len(d['input_ids']) for d in batch]
        batch_max_len = max(len_list)

        input_ids, labels = [], []
        for len_of_d, d in sorted(zip(len_list, batch), key=lambda x: -x[0]):
            pad_len = batch_max_len - len_of_d
            ids = d['input_ids'] + [self.pad_token_id] * pad_len
            label = d['labels'] + [self.ignore_label_id] * pad_len
            if batch_max_len > self.max_length:
                ids = ids[:self.max_length]
                label = label[:self.max_length]
            input_ids.append(torch.LongTensor(ids))
            labels.append(torch.LongTensor(label))
        return {
            'input_ids': torch.stack(input_ids),
            'labels': torch.stack(labels)
        }
        
# tain/eval for the datasets
for dataset_name in dataset_names:
    print(f"\n" + "="*50)
    print(f"🔍 Train/Eval with dataset: {dataset_name}")
    print("="*50)

    # ======================
    # 1. load dataset
    # ======================
    dataset = load_dataset(dataset_name)

    eval_dataset_loaded = None
    if 'validation' in dataset:
        eval_dataset_loaded = dataset['validation']
    else:
        print("⚠️  The dataset doesn't have 'validation' split， not evaluating.")

    # ======================
    # 2. set the used train/eval samples
    # ======================
    if num_train_samples is not None and num_train_samples > 0:
        print(f"num_train_samples: {num_train_samples}")
        dataset['train'] = dataset['train'].select(range(num_train_samples))
    if eval_dataset_loaded is not None and num_eval_samples is not None and num_eval_samples > 0:
        print(f"num_eval_samples: {num_eval_samples}")
        eval_dataset_loaded = eval_dataset_loaded.select(range(num_eval_samples))

    # ======================
    # 3. Tokenize
    # ======================
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True, revision='b098244')

    column_names = dataset['train'].column_names
    tokenized_train = dataset['train'].map(lambda x: tokenize_func(x, tokenizer), batched=False, remove_columns=column_names)

    tokenized_eval = None
    if eval_dataset_loaded is not None:
        column_names_eval = eval_dataset_loaded.column_names
        tokenized_eval = eval_dataset_loaded.map(lambda x: tokenize_func(x, tokenizer), batched=False, remove_columns=column_names_eval)

    # ======================
    # 4. init the model, LoRA、QLoRA
    # ======================
    model = AutoModel.from_pretrained(
        model_name_or_path,
        quantization_config=BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type='nf4',
            bnb_4bit_use_double_quant=True,
            bnb_4bit_compute_dtype=_compute_dtype_map[compute_dtype]
        ),
        device_map='auto',
        trust_remote_code=True,
        revision='b098244'
    )
    model = prepare_model_for_kbit_training(model)
    target_modules = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.get('chatglm', ['query_key_value'])
    lora_config = LoraConfig(
        target_modules=target_modules,
        r=lora_rank,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        bias='none',
        inference_mode=False,
        task_type=TaskType.CAUSAL_LM
    )
    qlora_model = get_peft_model(model, lora_config)
    qlora_model.print_trainable_parameters()

    # ======================
    # 5. TrainingArguments & Trainer
    # ======================
    saved_dir = f"models/finetuned_{dataset_name.replace('/', '_')}"
    print(f"training_args num_train_epochs: {num_train_epochs}, \
logging_steps: {logging_steps}, eval_steps: {eval_steps}, save_steps: {save_steps}\
, per_device_train_batch_size: {per_device_train_batch_size}, , gradient_accumulation_steps: {gradient_accumulation_steps}")

    training_args = TrainingArguments(
        output_dir=saved_dir,
        per_device_train_batch_size=per_device_train_batch_size,
        per_device_eval_batch_size=per_device_eval_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        learning_rate=learning_rate,
        num_train_epochs=num_train_epochs,
        evaluation_strategy="steps" if eval_dataset_loaded is not None else "no",
        eval_steps=eval_steps,
        save_strategy="steps",
        save_steps=save_steps,
        logging_steps=logging_steps,
        fp16=(compute_dtype == 'fp16'),
        bf16=(compute_dtype == 'bf16'),
        report_to="none"
    )

    data_collator = DataCollatorForChatGLM(pad_token_id=tokenizer.pad_token_id)
    
    trainer = Trainer(
        model=qlora_model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_eval,
        data_collator=data_collator,
    )

    # ======================
    # 6. train & auto print the train/validation loss
    # ======================
    trainer.train()

    # ======================
    # 7. saved the tuned model
    # ======================
    trainer.model.save_pretrained(saved_dir)
    print(f"✅ fine tuned model saved to path: {saved_dir}")


🔍 Train/Eval with dataset: HasturOfficial/adgen
num_train_samples: 1000
num_eval_samples: 100




Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

You are using an old version of the checkpointing format that is deprecated (We will also silently ignore `gradient_checkpointing_kwargs` in case you passed it).Please update to the new format on your modeling file. To use the new format, you need to completely remove the definition of the method `_set_gradient_checkpointing` in your model.


trainable params: 974,848 || all params: 6,244,558,848 || trainable%: 0.01561115883009451
training_args num_train_epochs: 1, logging_steps: 10, eval_steps: 10, save_steps: 30, per_device_train_batch_size: 8, , gradient_accumulation_steps: 2


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
10,4.2741,3.904693
20,3.6916,3.761558
30,3.5995,3.688349
40,3.586,3.647342
50,3.5033,3.624843
60,3.5322,3.612717


Checkpoint destination directory models/finetuned_HasturOfficial_adgen/checkpoint-30 already exists and is non-empty.Saving will proceed but saved results may be invalid.


✅ fine tuned model saved to path: models/finetuned_HasturOfficial_adgen

🔍 Train/Eval with dataset: shibing624/AdvertiseGen
num_train_samples: 1000
num_eval_samples: 100




ValueError: 
                        Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit
                        the quantized model. If you want to dispatch the model on the CPU or the disk while keeping
                        these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom
                        `device_map` to `from_pretrained`. Check
                        https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu
                        for more details.
                        

In [1]:
# this cell is to train the model with dataset: shibing624/AdvertiseGen, seperated to a new cell for GPU mem freeup
# free up gpu mem before processing next here.
def freeup_mem():
    print("\n🔧 Releasing GPU ...")
    import torch, gc
    torch.cuda.empty_cache()
    gc.collect()
    %reset -f
# call to freeup mem    
freeup_mem()

from datasets import load_dataset, ClassLabel, Sequence
from transformers import AutoTokenizer, TrainingArguments, Trainer
import torch
from peft import TaskType, LoraConfig, get_peft_model, prepare_model_for_kbit_training
from peft.utils import TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING
from transformers import AutoModel, BitsAndBytesConfig
import random
import pandas as pd
from IPython.display import display, HTML

# ======================
# 1. parameneters setup
# ======================

model_name_or_path = 'THUDM/chatglm3-6b'
seed = 8
max_input_length = 512
max_output_length = 1536
lora_rank = 4
lora_alpha = 32
lora_dropout = 0.05
resume_from_checkpoint = None
prompt_text = ''
compute_dtype = 'bf16'  # fp32 / fp16 / bf16

# support multiple dataset_names to be tested
dataset_names = [
    #"HasturOfficial/adgen",
    "shibing624/AdvertiseGen",
    #can add more dataset for test
]

# not use 10k data as it takes too much time...
num_train_samples = 1000
# use 0.1 of train data
num_eval_samples = num_train_samples // 10

# training paramenters setup
per_device_train_batch_size = 8
per_device_eval_batch_size = 8
gradient_accumulation_steps = 2
learning_rate = 1e-3
num_train_epochs = 1
#eval_steps = num_train_samples // (5 * per_device_train_batch_size * gradient_accumulation_steps)
#save_steps = num_train_samples // (3 * per_device_train_batch_size * gradient_accumulation_steps)
#logging_steps = num_train_samples // (15 * per_device_train_batch_size * gradient_accumulation_steps)
logging_steps = 10
eval_steps = 10
save_steps = 30

# dtype mapping
_compute_dtype_map = {
    'fp32': torch.float32,
    'fp16': torch.float16,
    'bf16': torch.bfloat16
}

# the tokenize function
def tokenize_func(example, tokenizer, ignore_label_id=-100):
    question = prompt_text + example['content']
    if example.get('input', None) and example['input'].strip():
        question += f'\n{example["input"]}'
    answer = example['summary']
    q_ids = tokenizer.encode(text=question, add_special_tokens=False)
    a_ids = tokenizer.encode(text=answer, add_special_tokens=False)
    if len(q_ids) > max_input_length - 2:
        q_ids = q_ids[:max_input_length - 2]
    if len(a_ids) > max_output_length - 1:
        a_ids = a_ids[:max_output_length - 1]
    input_ids = tokenizer.build_inputs_with_special_tokens(q_ids, a_ids)
    question_length = len(q_ids) + 2
    labels = [ignore_label_id] * question_length + input_ids[question_length:]
    return {'input_ids': input_ids, 'labels': labels}

# the DataCollator class
class DataCollatorForChatGLM:
    def __init__(self, pad_token_id: int, max_length: int = 2048, ignore_label_id: int = -100):
        self.pad_token_id = pad_token_id
        self.ignore_label_id = ignore_label_id
        self.max_length = max_length

    def __call__(self, batch):
        len_list = [len(d['input_ids']) for d in batch]
        batch_max_len = max(len_list)

        input_ids, labels = [], []
        for len_of_d, d in sorted(zip(len_list, batch), key=lambda x: -x[0]):
            pad_len = batch_max_len - len_of_d
            ids = d['input_ids'] + [self.pad_token_id] * pad_len
            label = d['labels'] + [self.ignore_label_id] * pad_len
            if batch_max_len > self.max_length:
                ids = ids[:self.max_length]
                label = label[:self.max_length]
            input_ids.append(torch.LongTensor(ids))
            labels.append(torch.LongTensor(label))
        return {
            'input_ids': torch.stack(input_ids),
            'labels': torch.stack(labels)
        }
        
# tain/eval for the datasets
for dataset_name in dataset_names:
    print(f"\n" + "="*50)
    print(f"🔍 Train/Eval with dataset: {dataset_name}")
    print("="*50)

    # ======================
    # 1. load dataset
    # ======================
    dataset = load_dataset(dataset_name)

    eval_dataset_loaded = None
    if 'validation' in dataset:
        eval_dataset_loaded = dataset['validation']
    else:
        print("⚠️  The dataset doesn't have 'validation' split， not evaluating.")

    # ======================
    # 2. set the used train/eval samples
    # ======================
    if num_train_samples is not None and num_train_samples > 0:
        print(f"num_train_samples: {num_train_samples}")
        dataset['train'] = dataset['train'].select(range(num_train_samples))
    if eval_dataset_loaded is not None and num_eval_samples is not None and num_eval_samples > 0:
        print(f"num_eval_samples: {num_eval_samples}")
        eval_dataset_loaded = eval_dataset_loaded.select(range(num_eval_samples))

    # ======================
    # 3. Tokenize
    # ======================
    tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True, revision='b098244')

    column_names = dataset['train'].column_names
    tokenized_train = dataset['train'].map(lambda x: tokenize_func(x, tokenizer), batched=False, remove_columns=column_names)

    tokenized_eval = None
    if eval_dataset_loaded is not None:
        column_names_eval = eval_dataset_loaded.column_names
        tokenized_eval = eval_dataset_loaded.map(lambda x: tokenize_func(x, tokenizer), batched=False, remove_columns=column_names_eval)

    # ======================
    # 4. init the model, LoRA、QLoRA
    # ======================
    model = AutoModel.from_pretrained(
        model_name_or_path,
        quantization_config=BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type='nf4',
            bnb_4bit_use_double_quant=True,
            bnb_4bit_compute_dtype=_compute_dtype_map[compute_dtype]
        ),
        device_map='auto',
        trust_remote_code=True,
        revision='b098244'
    )
    model = prepare_model_for_kbit_training(model)
    target_modules = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.get('chatglm', ['query_key_value'])
    lora_config = LoraConfig(
        target_modules=target_modules,
        r=lora_rank,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        bias='none',
        inference_mode=False,
        task_type=TaskType.CAUSAL_LM
    )
    qlora_model = get_peft_model(model, lora_config)
    qlora_model.print_trainable_parameters()

    # ======================
    # 5. TrainingArguments & Trainer
    # ======================
    saved_dir = f"models/finetuned_{dataset_name.replace('/', '_')}"
    print(f"training_args num_train_epochs: {num_train_epochs}, \
logging_steps: {logging_steps}, eval_steps: {eval_steps}, save_steps: {save_steps}\
, per_device_train_batch_size: {per_device_train_batch_size}, , gradient_accumulation_steps: {gradient_accumulation_steps}")

    training_args = TrainingArguments(
        output_dir=saved_dir,
        per_device_train_batch_size=per_device_train_batch_size,
        per_device_eval_batch_size=per_device_eval_batch_size,
        gradient_accumulation_steps=gradient_accumulation_steps,
        learning_rate=learning_rate,
        num_train_epochs=num_train_epochs,
        evaluation_strategy="steps" if eval_dataset_loaded is not None else "no",
        eval_steps=eval_steps,
        save_strategy="steps",
        save_steps=save_steps,
        logging_steps=logging_steps,
        fp16=(compute_dtype == 'fp16'),
        bf16=(compute_dtype == 'bf16'),
        report_to="none"
    )

    data_collator = DataCollatorForChatGLM(pad_token_id=tokenizer.pad_token_id)
    
    trainer = Trainer(
        model=qlora_model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_eval,
        data_collator=data_collator,
    )

    # ======================
    # 6. train & auto print the train/validation loss
    # ======================
    trainer.train()

    # ======================
    # 7. saved the tuned model
    # ======================
    trainer.model.save_pretrained(saved_dir)
    print(f"✅ fine tuned model saved to path: {saved_dir}")
    


🔧 Releasing GPU ...

🔍 Train/Eval with dataset: shibing624/AdvertiseGen
num_train_samples: 1000
num_eval_samples: 100




Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

You are using an old version of the checkpointing format that is deprecated (We will also silently ignore `gradient_checkpointing_kwargs` in case you passed it).Please update to the new format on your modeling file. To use the new format, you need to completely remove the definition of the method `_set_gradient_checkpointing` in your model.


trainable params: 974,848 || all params: 6,244,558,848 || trainable%: 0.01561115883009451
training_args num_train_epochs: 1, logging_steps: 10, eval_steps: 10, save_steps: 30, per_device_train_batch_size: 8, , gradient_accumulation_steps: 2


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss
10,4.2711,3.901426
20,3.6885,3.760577
30,3.6004,3.694746
40,3.5881,3.652654
50,3.5053,3.631722
60,3.5398,3.619895




✅ fine tuned model saved to path: models/finetuned_shibing624_AdvertiseGen


In [2]:
# this cell is to evaluate the modles
def freeup_mem():
    print("\n🔧 Releasing GPU ...")
    import torch, gc
    torch.cuda.empty_cache()
    gc.collect()
    %reset -f
# call to freeup mem    
freeup_mem()

import torch
from transformers import AutoModel, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel, PeftConfig
import time

# ======================
# 1. parameters setup
# ======================

# the base model
model_name_or_path = 'THUDM/chatglm3-6b'

# support multiple fine tuned models
peft_model_paths = [
    "models/finetuned_HasturOfficial_adgen",
    "models/finetuned_shibing624_AdvertiseGen",
    #can add more models for test
]

# init q_config（with 4bit）
q_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float32
)

# ======================
# 2. load the base model
# ======================

print(f"🔹 Loading the base model: {model_name_or_path} (not fine tuned)...")
base_model = AutoModel.from_pretrained(
    model_name_or_path,
    quantization_config=q_config,
    trust_remote_code=True,
    device_map='auto'
)
base_model.requires_grad_(False)

# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True)

# ======================
# 3. the test text input
# ======================
input_text = '类型#裙*版型#显瘦*风格#文艺*风格#简约*图案#印花*图案#撞色*裙下摆#压褶*裙长#连衣裙*裙领型#圆领'
print(f"\n📝 The test text input:\n{input_text}")

# ======================
# 4. function to evaluate the model
# ======================
def evaluate_model(model_title, model, tokenizer, input_text):
    print(f"\n{'='*50}")
    print(f"🤖 Evaluating Model:{model_title}")
    print(f"{'='*50}")
    try:
        start_time = time.time()
        start_time_formated = time.strftime("%Y-%m-%d_%H%M%S", time.localtime(start_time))
        print(f"🚀  Start inferencing.... start_time: {start_time_formated}")
        model.eval()
        response, history = model.chat(tokenizer=tokenizer, query=input_text)
        end_time = time.time()  # Record end time
        # Calculate and print processing time
        processing_time_sec = end_time - start_time
        print(f"\n⏱️  Processing time (inference): {processing_time_sec:.3f} seconds")        
        print(f"📤  Response:\n{response}")
    except Exception as e:
        print(f"❌ Failed to evaluate model {model_title} with error:{e}")

# ======================
# 5.1. Evaluate base model
# ======================
print("\n" + "="*60)
print(f"🔍 Evaluating base model: {model_name_or_path} (not fine tuned)")
print("="*60)
evaluate_model(f"Base model: {model_name_or_path} (not fine tuned)", base_model, tokenizer, input_text)    

# ======================
# 5.2. Evaluating fine tuned models
# ======================
for peft_model_dir in peft_model_paths:
    print("\n" + "="*60)
    print(f"🔍 Loading PEFT model from: {peft_model_dir}")
    print("="*60)
    
    try:
        # Load the PEFT adapter with the base_model
        peft_model = PeftModel.from_pretrained(base_model, peft_model_dir)
        evaluate_model(f"Fine tuned model ({peft_model_dir})", peft_model, tokenizer, input_text)
    except Exception as e:
        print(f"❌ Failed to load the model: {peft_model_dir} with error: {e}")
    


🔧 Releasing GPU ...
🔹 Loading the base model: THUDM/chatglm3-6b (not fine tuned)...




Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Setting eos_token is not supported, use the default one.
Setting pad_token is not supported, use the default one.
Setting unk_token is not supported, use the default one.



📝 The test text input:
类型#裙*版型#显瘦*风格#文艺*风格#简约*图案#印花*图案#撞色*裙下摆#压褶*裙长#连衣裙*裙领型#圆领

🔍 Evaluating base model: THUDM/chatglm3-6b (not fine tuned)

🤖 Evaluating Model:Base model: THUDM/chatglm3-6b (not fine tuned)
🚀  Start inferencing.... start_time: 2025-08-08_123442

⏱️  Processing time (inference): 21.851 seconds
📤  Response:
根据您的描述，我为您推荐一款文艺简约的印花图案连衣裙，具有显瘦的版型和优雅的圆领设计。裙下摆采用压褶设计，增加层次感和时尚感。整款连衣裙以撞色图案为主题，展现出别样的个性魅力。这款连衣裙适合各种场合，如约会、聚会或上班等，让您在任何场合都能展现出优雅的气质。

购买这款连衣裙，您可以考虑以下商家和平台：
1. 大型电商平台：如淘宝、京东等，您可以比较多个商家的价格和服务，选择一家信誉良好的商家购买。
2. 官方品牌商城：如品牌官方商城、官方网站等，确保购买到正品，享受售后保障。
3. 时尚博主或网红店铺：参考时尚博主的搭配文章和网红店铺的推荐，获取更多搭配灵感。

在购买时，请注意以下几点：
1. 对比多个商家的价格，选择性价比高的产品。
2. 仔细阅读商品描述和图片，确认商品的品质、图案和版型是否符合您的要求。
3. 参考商家评价和客户评价，了解其他购买者对该商品的评价和建议。
4. 注意商品的尺寸、颜色和款式，确保符合您的需求。

希望您购买到满意的连衣裙，享受时尚的穿着体验！

🔍 Loading PEFT model from: models/finetuned_HasturOfficial_adgen

🤖 Evaluating Model:Fine tuned model (models/finetuned_HasturOfficial_adgen)
🚀  Start inferencing.... start_time: 2025-08-08_123504

⏱️  Processing time (inferen