# Prompt Engineering

In [None]:
!pip install -q torch transformers datasets==3.0.0 accelerate peft trl evaluate rouge_score

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/474.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m471.0/474.3 kB[0m [31m22.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m474.3/474.3 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/177.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.6/177.6 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gcsfs 2025.3.2 requires fsspec==2025.3.2, but you have fsspec 2024.6.1 which is incompatible.[0m[31m
[0m

In [None]:
import torch, json
from random import randint
from tqdm.notebook import tqdm
import transformers
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from trl import SFTTrainer, SFTConfig
from peft import LoraConfig, get_peft_model, TaskType
import evaluate
import warnings

warnings.filterwarnings("ignore")
transformers.logging.set_verbosity_error()

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Transformers version: {transformers.__version__}")

PyTorch version: 2.6.0+cu124
CUDA available: True
Transformers version: 4.53.2


In [None]:
from huggingface_hub import login
login(token="Your Token Here")

## Load Tokenizer

In [None]:
model_name = "Qwen/Qwen2-1.5B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    torch_dtype="auto",
    device_map="auto"
)

## Dataset and Preprocessing

In [None]:
class TweetSummPreprocessor():

    # Added tokenizer to the constructor
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer
        self.instruction = "# Instruction\n" \
            "Based on the provided dialog between a customer and agent, " \
            "generate a concise summary of the dialog. " \
            "The summary should include the main idea of the dialog. " \
            "The summary should be exactly two sentences. " \
            "The first sentence of the summary is about the customer, " \
            "and the second sentence of summary is about the agent."

    def __is_valid_word(self, word):
        return not (word[0] in '@#*^<' or 'http' in word)

    def __preprocess_text(self, txt):
        txt = txt.encode('ascii', 'ignore').decode()
        return " ".join(filter(self.__is_valid_word , txt.split()))

    def __process_dialog(self, dialog):

        dialog_txts = []

        for turn in dialog:
            user_text = self.__preprocess_text(turn['user utterance'])
            dialog_txts.append(f"customer: {user_text}")
            agent_text = self.__preprocess_text(turn['system response'])
            dialog_txts.append(f"agent: {agent_text}")

        return "\n".join(dialog_txts)

    def __process_summary(self, info):

        summaries = json.loads(info)['summaries']['abstractive_summaries']
        summary = summaries[randint(0, len(summaries)-1)]
        summary = [self.__preprocess_text(sentence) for sentence in summary]

        return " ".join(summary)

    def __process_datapoint(self, datapoint):

        dialog = self.__process_dialog(datapoint['log'])
        summary = self.__process_summary(datapoint['original dialog info'])

        # Using the tokenizer's eos_token instead of a hardcoded string
        text = self.instruction + "\n# Dialog\n" + dialog + \
               "\n# Summary\n" + summary + self.tokenizer.eos_token

        return {
            'dialog': dialog,
            'summary': summary,
            'text': text,
            'prompt': self.instruction + "\n# Dialog\n" + dialog + "\n# Summary\n",
            "completion": summary + self.tokenizer.eos_token
        }

    def __call__(self, datapoint):
        return self.__process_datapoint(datapoint)

    def get_zero_shot_prompt(self, datapoint):

        prompt = self.instruction + \
                 "\n# Dialog\n" + datapoint['dialog'] + "\n# Summary\n"

        return prompt

    def get_one_shot_prompt(self, datapoint, datapool):

        pool_indx = randint(0, len(datapool)-1)
        example = datapool[pool_indx]

        prompt = self.instruction + "\n# Dialog\n" + example['dialog'] + \
                 "\n# Summary\n" + example['summary'] + \
                 "\n# Dialog\n" + datapoint['dialog'] + "\n# Summary\n"

        return prompt

In [None]:
tweetSumm = load_dataset('Salesforce/dialogstudio', 'TweetSumm', trust_remote_code=True)
preprocessor = TweetSummPreprocessor(tokenizer)

colums_to_remove = ['original dialog id', 'new dialog id', 'dialog index', 'original dialog info', 'log']
tweetSumm['train'] = tweetSumm['train'].shuffle(seed=42).map(preprocessor).remove_columns(colums_to_remove)
tweetSumm['validation'] = tweetSumm['validation'].shuffle(seed=42).map(preprocessor).remove_columns(colums_to_remove)
tweetSumm['test'] = tweetSumm['test'].shuffle(seed=42).map(preprocessor).remove_columns(colums_to_remove)

gt_summaries = [datapoint['summary'] for datapoint in tweetSumm['test']]
rouge = evaluate.load('rouge')

### Zero-Shot Evaluation

In [None]:
zero_shot_dataset = [preprocessor.get_zero_shot_prompt(datapoint) for datapoint in tweetSumm['test']]
print(zero_shot_dataset[0])

# Instruction
Based on the provided dialog between a customer and agent, generate a concise summary of the dialog. The summary should include the main idea of the dialog. The summary should be exactly two sentences. The first sentence of the summary is about the customer, and the second sentence of summary is about the agent.
# Dialog
customer: looking to change my flight Friday, Oct 27. GRMSKV to DL4728 from SLC to ORD. Is that an option and what is the cost? Jess
agent: The difference in fare is $185.30. This would include all airport taxes and fees. The ticket is non-refundable changeable with a fee, and may result in additional fare collection for changes when making a future changes.
customer: I had a first class seat purchased for the original flight, would that be the same with this flight to Chicago?
agent: Hello, Jess. That is the fare difference. You will have to call us at 1 800 221 1212 to make any changes. It is in First class.
customer: thx
agent: Our pleasure.
customer: 

In [None]:
zero_shot_outputs = []

for prompt in tqdm(zero_shot_dataset):

    inputs = tokenizer(prompt, return_tensors='pt').to(model.device)

    with torch.no_grad():
        tokens = model.generate(
            **inputs, max_new_tokens=64, pad_token_id=tokenizer.pad_token_id
        )

    tokens = tokens[0][inputs['input_ids'].size(1):]
    output = tokenizer.decode(tokens, skip_special_tokens=True)
    zero_shot_outputs.append(output)

  0%|          | 0/110 [00:00<?, ?it/s]

In [None]:
results = rouge.compute(predictions=zero_shot_outputs, references=gt_summaries)
print("Zero-Shot ROUGE Scores:")
print(results)

Zero-Shot ROUGE Scores:
{'rouge1': np.float64(0.36726034343988745), 'rouge2': np.float64(0.11481313993461453), 'rougeL': np.float64(0.2814894848571833), 'rougeLsum': np.float64(0.2817368637071258)}


### One-Shot Evaluation

In [None]:
one_shot_dataset = [preprocessor.get_one_shot_prompt(datapoint, tweetSumm['train']) for datapoint in tweetSumm['test']]
print(one_shot_dataset[0])

# Instruction
Based on the provided dialog between a customer and agent, generate a concise summary of the dialog. The summary should include the main idea of the dialog. The summary should be exactly two sentences. The first sentence of the summary is about the customer, and the second sentence of summary is about the agent.
# Dialog
customer: also, how do I import my iTunes smart playlist songs to Spotify?
agent: Hey there! You can try following the steps at Let us know how it goes. On the other hand, we're afraid that local files will not appear under Your Library (Songs, Artists, Albums).
customer: please do. I've worked the past 10 years on creating those playlists in iTunes, and the whole purpose of migrating to Spotify is that it's social. So now I don't have any benefits of being a Spotify subscriber... is there really no workaround for this? I wouldn't mind putting some time into making this work, but Spotify just has so little use for me if this can't be done...
agent: We're 

In [None]:
one_shot_outputs = []

for prompt in tqdm(one_shot_dataset):

    inputs = tokenizer(prompt, return_tensors='pt').to(model.device)

    with torch.no_grad():
        tokens = model.generate(
            **inputs, max_new_tokens=64, pad_token_id=tokenizer.pad_token_id
        )

    tokens = tokens[0][inputs['input_ids'].size(1):]
    output = tokenizer.decode(tokens, skip_special_tokens=True)
    one_shot_outputs.append(output)

  0%|          | 0/110 [00:00<?, ?it/s]

In [None]:
results = rouge.compute(predictions=one_shot_outputs, references=gt_summaries)
print("One-Shot ROUGE Scores:")
print(results)

One-Shot ROUGE Scores:
{'rouge1': np.float64(0.36481825871434326), 'rouge2': np.float64(0.128734082932617), 'rougeL': np.float64(0.295246312038686), 'rougeLsum': np.float64(0.2953557537867456)}


## LoRA Fine-Tuning

In [None]:
lora_config = LoraConfig(
    task_type="CAUSAL_LM",
    r=16, lora_alpha=16,
    bias="none",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    inference_mode=False,
)

training_args = SFTConfig(
    output_dir="./qwen2-lora-finetuned",
    per_device_train_batch_size=2,
    learning_rate=2e-4,
    num_train_epochs=3,
    logging_steps=10,
    save_strategy="epoch",
    bf16=True,
    report_to="none",
)

trainer = SFTTrainer(
    model=model,
    train_dataset=tweetSumm['train'],
    peft_config=lora_config,
    args=training_args
)

print("\nStarting training...")
trainer.train()

In [None]:
fine_tuned_outputs = []

for prompt in tqdm(zero_shot_dataset):

    inputs = tokenizer(prompt, return_tensors='pt').to(model.device)

    with torch.no_grad():
        tokens = model.generate(
            **inputs, max_new_tokens=64, pad_token_id=tokenizer.pad_token_id
        )

    tokens = tokens[0][inputs['input_ids'].size(1):]
    output = tokenizer.decode(tokens, skip_special_tokens=True)
    fine_tuned_outputs.append(output)

In [None]:
results_16 = rouge.compute(predictions=fine_tuned_outputs, references=gt_summaries)
print("LoRA Fine-Tuned ROUGE Scores:")
print(results_16)