# Mistral 7b run after finetuning
In this notebook we run a previously finetuned version of Mistral 7b
* We created the dataset here: https://www.kaggle.com/code/alessandrodrago1/train-data-generation-for-llm-prompt-recovery
* We finetuned Mistral 7b here: https://www.kaggle.com/code/alessandrodrago1/mistral-7b-train

In [1]:
%%capture all
# https://www.kaggle.com/code/hotchpotch/llm-detect-pip 
!pip install -q -U accelerate --no-index --find-links ../input/llm-detect-pip/
!pip install -q -U bitsandbytes --no-index --find-links ../input/llm-detect-pip/
!pip install -q -U transformers --no-index --find-links ../input/llm-detect-pip/
!pip install -Uq /kaggle/input/sentence-transformers-2-4-0/sentence_transformers-2.4.0-py3-none-any.whl
!pip install peft

In [2]:
import sys
import torch
import random
import numpy as np
import pandas as pd
import gc

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
sys.path.append("/kaggle/input/peft-main/src")

from peft import PeftModel

#https://github.com/Lightning-AI/lit-gpt/issues/327
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_flash_sdp(False)

if (not torch.cuda.is_available()): print("Sorry - GPU required!")

In [3]:
#If Mistral doesn't respond - we use this well-scoring prompt instead...
#thanks to: https://www.kaggle.com/code/rdxsun/lb-0-61
base_line = 'Refine the following passage by emulating the writing style of [insert desired style here], with a focus on enhancing its clarity, elegance, and overall impact. Preserve the essence and original meaning of the text, while meticulously adjusting its tone, vocabulary, and stylistic elements to resonate with the chosen style.Please improve the following text using the writing style of, maintaining the original meaning but altering the tone, diction, and stylistic elements to match the new style.Enhance the clarity, elegance, and impact of the following text by adopting the writing style of , ensuring the core message remains intact while transforming the tone, word choice, and stylistic features to align with the specified style.' 

#what's the longest prompt Mistral should respond with?
max_new_tokens = 35

# Load the model

In [4]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

In [5]:
HF_DATASETS_OFFLINE=1 
TRANSFORMERS_OFFLINE=1

In [6]:
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda" # the device to load the model onto
#base_model = "mistralai/Mistral-7B-Instruct-v0.2"
base_model = "/kaggle/input/mistral-7b-it-v02"

model = AutoModelForCausalLM.from_pretrained(
        base_model,
        quantization_config=bnb_config,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
)

model.config.use_cache = False 
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.


In [7]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.padding_side = 'left'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token

(True, True)

# Load the finetuned model

In [8]:
model = PeftModel.from_pretrained(model, "/kaggle/input/mistral-7b-train/mistral_prompt_recovery_finetuned/")

In [9]:
def remove_numbered_list(text):
    final_text_paragraphs = [] 
    for line in text.split('\n'):
        # Split each line at the first occurrence of '. '
        parts = line.split('. ', 1)
        # If the line looks like a numbered list item, remove the numbering
        if len(parts) > 1 and parts[0].isdigit():
            final_text_paragraphs.append(parts[1])
        else:
            # If it doesn't look like a numbered list item, include the line as is
            final_text_paragraphs.append(line)

    return '  '.join(final_text_paragraphs)

#trims LLM output to just the response
def trim_to_response(text):
    terminate_string = "[/INST]"
    text = text.replace('</s>', '')
    #just in case it puts things in quotes
    text = text.replace('"', '')
    text = text.replace("'", '')

    last_pos = text.rfind(terminate_string)
    return text[last_pos + len(terminate_string):] if last_pos != -1 else text

#looks for response_start / returns only text that occurs after
def extract_text_after_response_start(full_text):
    parts = full_text.rsplit(response_start, 1)  # Split from the right, ensuring only the last occurrence is considered
    if len(parts) > 1:
        return parts[1].strip()  # Return text after the last occurrence of response_start
    else:
        return full_text  # Return the original text if response_start is not found
    
def trim_to_last_period(text):
    last_period_index = text.rfind('.')
    if last_period_index == -1:
        return text
    else:
        return text[:last_period_index + 1]

# Prompt setup
We prompt the model in the same way done during training

In [10]:
## original text prefix
orig_prefix = "Original Text:"

#mistral "response"
llm_response_for_rewrite = "Provide the modified text and I'll tell you the request that caused the change."

#modified text prefix
rewrite_prefix = "Re-written Text:"

#provided as start of Mistral response (anything after this is used as the prompt)
response_start = "The request was: "

In [11]:
def get_prompt(orig_text, transformed_text):

    #construct the prompt sequence...
    messages = [
        #actual prompt
        {"role": "user", "content": f"{orig_prefix} {orig_text}"},
        {"role": "assistant", "content": llm_response_for_rewrite},
        {"role": "user", "content": f"{rewrite_prefix} {transformed_text}"},
        {"role": "assistant", "content": response_start},
    ]
        
    #give it to Mistral
    model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt")
    model_inputs = model_inputs.to("cuda") 
    generated_ids = model.generate(model_inputs, max_new_tokens=max_new_tokens, pad_token_id=tokenizer.eos_token_id)

    #decode and trim to actual response
    decoded = tokenizer.batch_decode(generated_ids)
    just_response = trim_to_response(decoded[0])        
    final_text = extract_text_after_response_start(just_response)

    #default to baseline
    if len(final_text) == 0:
        final_text = base_line
    
    #mistral has been replying with numbered lists - clean them up....
    final_text = remove_numbered_list(final_text)
    final_text = trim_to_last_period(final_text)
    
    return final_text

# Evaluate 

In [12]:
test_df = pd.read_csv("/kaggle/input/llm-prompt-recovery/test.csv")

for index, row in test_df.iterrows():
    result = get_prompt(row['original_text'], row['rewritten_text'])
    print(result)
    test_df.at[index, 'rewrite_prompt'] = result
    
test_df = test_df[['id', 'rewrite_prompt']]
test_df

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
2024-08-28 10:03:14.417266: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-28 10:03:14.417369: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-28 10:03:14.542022: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Make the text into a shanty song.


Unnamed: 0,id,rewrite_prompt
0,-1,Make the text into a shanty song.


In [13]:
test = pd.read_csv('/kaggle/input/llm-prompt-recovery-ground-truth-1/test (1).csv')
test = test.head(195)

In [14]:
for index, row in test.iterrows():
    result = get_prompt(row['original_text'], row['rewritten_text'])
    print(result)
    test.at[index, 'pred'] = result

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a gourmet food festival brochure.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 Describe this as an expertise to be gained.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 Im unable to complete the request as the text does not contain any information about a painting or artwork to be critiqued.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a detectives case file note.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a financial forecast report for a company.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a letter from a soldier in the trenches.     The text has been modified to include a soldiers perspective and tone.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a wedding vow in a fantasy realm.     The text has been modified to include a wedding vow in a fantasy realm.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a Beatles-inspired song.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a poetic ode to a river.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a digital transformation strategy document.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a series of instructions for a time traveler.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 The request was:    Make the text into a log entry from a future AI.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a film festival brochure description.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 The text does not contain any information about a monologue, therefore I cannot complete the request. [/] ## The Evil Dr.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a gourmet food festival brochure.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a formal invitation to a corporate event.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a zombie apocalypse survival guide.    **Step 1: Secure your home.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make this a data analysis presentation.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text more child-friendly.    **Heres the modified text:**    Imagine a big fire in a forest called the Blue Cut wildfire.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Convert this into a corporate ethics statement.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a corporate governance guideline.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a motivational fitness instructors speech.    **Hi everybody, lets get ready to take our fitness to the next level together.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a medieval ballad.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a lullaby.     The text has been modified to include a lullaby-like melody and rhythm.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make this a contract agreement.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a guide to deciphering ancient runes.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make this a risk management plan.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a love letter.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a brochure for a zoo in space.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a nostalgic family story from the 90s.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make this a procurement request form.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a yoga instructors session description.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a late-night infomercial script.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a public service announcement.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 Turn this into a resolution to be resolved.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into the opening of a dystopian novel.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a fortune cookie message.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a Beatles-inspired protest song about climate change.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make it a noir detective story.    The text does not contain any information about a bike event, therefore I cannot modify the text to include that information.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a fitness apps workout challenge description.     The text has been modified to fit the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a tactic to be used.     Ive modified the text to make it into a tactic that can be used.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a menu for a fantasy tavern.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 ## Challenge:    **Run 5,000 miles (8,000km) around the British coastline, sleeping rough and


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a 1980s hair metal bands song lyrics.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a weather forecast script.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a recipe for a magical potion.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 Make the text into a vintage radio broadcast script.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a restaurants sensory symphony tasting menu description.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a letter to a friend expressing your thoughts on a recent news story.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a corporate governance guideline.     The text does not contain any information about a corporation, therefore I cannot complete the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a motivational speech for a sports team.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make this a charter to be chartered.     I have modified the text to make it a charter to be chartered.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 I am unable to provide the requested conversion as the text provided does not describe a time machine or any time-related equipment.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a self-help book excerpt.     Ive modified the text to fit the requested format.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 ## Detective Case File: Ancient Secrets in Snowdonia    **Case:** Mysterious Archaeological Discoveries in Snowd


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a movie theater advertisement script.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 The text does not contain any information about a patent infringement defense, therefore I cannot provide the requested text.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make this a performance improvement plan.    The text does not contain any information about a performance improvement plan, therefore I cannot complete the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make this a research study to be conducted.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a motivational poster quote.     Heres the modified text:    While the land crumbles, the spirit remains.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a Broadway musical number.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 The text


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a corporate ethics statement.     The text does not contain any information about a company, therefore I cannot complete the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a filmmakers pitch for a dream project.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a Broadway musical number.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a letter of appreciation for a teams hard work on a project.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a recipe for a cooking show.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a pirate song.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a late-night infomercial script.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a blues song about lost love.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a confession booth monologue.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a flight path description.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a fantasy tavern menu.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 ## The Aliens Perspective    As an alien, I sat there, my mouth agape in disbelief at the figure beside me


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a discovery log of a hidden land.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 The request was:    Make the text into a travelers journal entry.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a cooking show script featuring The Beatles.     The text has been modified to include a cooking show script featuring The Beatles.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 The text does not contain any information about a dispute resolution procedure for positive tests, therefore I cannot provide the requested text.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a Shakespearean sonnet.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a design concept for a retail store.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a letter from a grandparent to their grandchild.     The text has been modified to fit the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 ## Product Development Timeline    **Q1 2023:**    * **Market Research:** Conduct market research to identify target


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a software developers log.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a stand-up comedy routine.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 Make the text into a motivational poster slogan.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a book editors query letter feedback.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a modern living listicle.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a gardening show segment outline.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text negative.    The modified text is negative in tone, as it expresses a dislike for the carnival and Caribbean culture.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a chefs secret recipe for a dish that doesnt exist.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a competitive dancers performance routine explanation.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make this a training module on sales techniques for customer service representatives.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 The request was:    **Rewrite the text as if it were a resume summary.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make it a sitcom script.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a holiday greeting card message.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a vintage radio advertisement script.    The text was:    The request was reportedly sent to the Australian embassy in Washington on Thursday.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a news article title.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a formal invitation to a corporate event.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a TED-Ed lesson script.     **Hi everyone, and welcome to my TED-Ed lesson on this unfortunate incident.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a news report.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a mentors guidance.     The text does not contain any information about a mentor, therefore I cannot complete the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a sitcom script.    **Characters:**    * **Senator Bartholomew T.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make this a performance improvement plan.    The text does not contain any information about a performance improvement plan, therefore I cannot complete the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Imagine this as a professional critique of a business strategy.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a letter of resignation for a secret agent.     The text has been modified to fit the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a social media post for a fictional football club.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a corporate sustainability report.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Rewrite the text as if it were a scandal involving historical figures.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Describe this as if it were an accounting principle.     Since the text does not contain any information related to accounting principles, I cannot complete the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make it a sitcom episode script.    **INT. CROWDED CLASSROOM - DAY**    The bell rings.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a mystery solved by the Beatles.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a financial forecast report.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.




A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a motivational speakers workshop agenda.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a detectives case file note.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a motivational speech for a sports team.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a trophy inscription.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a public statement for a space exploration mission.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 ## Critique    The business strategy for Wigtowns upcoming literary festival is well-structured and promises to be engaging and informative.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a protest song about climate change.     The text has been modified to include a protest song about climate change.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a reality show confession.     The text was modified to include a confession from a reality show contestant.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a team offsite workshop agenda.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a vintage radio broadcast script.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make this a regulation to be regulated.    The text does not contain any information about a regulation to be regulated, therefore I cannot complete the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 The text does not contain any information about time travel, therefore I cannot complete the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a culinary competition contestants introduction.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a mindfulness meditation guide.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a passionate opera singers aria.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a pitch for a startup in the tech industry.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a news article about a world record attempt for the fastest time to commit a murder.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a practice exam question.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a retro-futuristic advertisement script.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a viral challenge description.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a press release for a new building in a town.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a Beatles-inspired song.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a film festival brochure description.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a product specification document.    **Product:** Educational Strike Response System    **Version:** 1.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a symposium agenda.    **Answer:**    The text has been modified to create a symposium agenda.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a dating profile message.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make it a recording of a lost civilizations wisdom.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 ## The Secret Handshake of the Welsh Language Commissioner    **Step 1:**    Gather in a dimly lit room, prefer


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a vaudeville act introduction.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a late-night infomercial script.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make it a poets muse.     The text has been transformed into a poets muse, incorporating poetic language and themes.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a poetic ode to a champions spirit.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a vaudeville act introduction.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a motivational speech.     The text has been modified to include motivational language and a call to action.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make this a letter of termination of a contract.     The text does not contain any information about a contract, therefore I cannot complete the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into an audit report summary.    The text does not contain any information about an audit, therefore I cannot complete the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a spy thriller dossier.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a proposal for a community support service.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 Im unable to complete the request as the text does not contain any information about personal development seminars.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a formal business proposal for a restructuring.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a roast battle between two fictional characters.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Imagining this as a symphony, how would you write the movements?     So I re-wrote the text as if it were a symphony


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a discovery to be made.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Rewrite the text as if it were a mentors guidance to an apprentice.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a solemn vow to be taken in a fantasy world.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into an advertisement for a perfume inspired by the scent of a victorious badminton match.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a farewell letter from a dying sailor.     The text was modified to fit the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a script for a play.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a dating profile message.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a vintage art gallery opening night invitation.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Rewrite the text as if it were a Shakespearean sonnet.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a fitness equipment infomercial script.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a corporate sustainability report.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text sound like a vintage radio broadcast.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a space mission briefing.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make this a due diligence report.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a 1950s rock and roll song.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a detective agency advertisement.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 The request was:    Make the text into a personal development seminars key points.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a courtroom verdict.    **Verdict:**    In the case of Mr. X vs.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 The text does not contain any information about a sailors navigation chart, therefore I cannot complete the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a rap song.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Convert the text into a skill to be mastered in a video game.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a motivational quote.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a 1950s science fiction movie script.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a high school teachers lesson plan.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 ## Scientists Breakthrough    **Discovering the Secrets of the Universe**    **Dr.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Imagining this as an aliens perspective on human society, what would you critique?    Since the text does not provide any information about the perspective of aliens


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a procedure to be followed.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a Shakespearean soliloquy from a villain.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Imagining this as an alien perspective, how would you describe it?     The alien perspective is a unique and intriguing way to view the situation.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a dedication plaque for a new store opening.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a quality assurance checklist.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a screenplay format.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a mission statement for a financial services company.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


 The text does not contain any information about a security upgrade at Aurigny, therefore I cannot complete the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make this a business continuity plan.    The text does not contain any information about a business continuity plan, therefore I cannot complete the request.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into an employee satisfaction survey.    **Answer:**    The text has been modified to create an employee satisfaction survey.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a Shakespearean sonnet.


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Make the text into a vintage carnival barkers pitch.
Make the text into a treasure map description.


In [15]:
test

Unnamed: 0,id,original_text,rewrite_prompt,rewritten_text,pred
0,40837062,South Wales Police officers had been pursuing ...,Make the text into a gourmet food festival bro...,## Gourmet Food Festival Brochure\n\n**Indulge...,Make the text into a gourmet food festival bro...
1,36917156,Police were told an Audi's number plates were ...,Describe this as an expertise to be gained.,The expertise gained in this scenario is the a...,Describe this as an expertise to be gained.
2,38155860,"Mr Burke, 19, from Ripon in North Yorkshire, w...",Change the text into an art critique,**Art Critique**\n\nThe text provided does not...,Im unable to complete the request as the text...
3,35189843,Twenty-four-year-old Aldis Minakovskis is alle...,Make the text into a detective's case file note,## Detective's Case File Note - Aldis Minakovs...,Make the text into a detectives case file note.
4,36670719,The 29-year-old has signed a four-year contrac...,Convert this into a financial forecast report.,**Financial Forecast Report**\n\n**Company:** ...,Make the text into a financial forecast report...
...,...,...,...,...,...
190,32399004,"Andrew Picton, 66, of Eastcroft Close, Blagdon...",Craft this into a business continuity plan.,## Business Continuity Plan for Company XYZ\n\...,Make this a business continuity plan. The t...
191,40964641,17 August 2017 Last updated at 15:29 BST\nA so...,Transform this into an employee satisfaction s...,**Employee Satisfaction Survey**\n\n**Please a...,Make the text into an employee satisfaction su...
192,34703934,The group will play at the Etihad Stadium in t...,Transform the message into a Shakespearean sonnet,"O sweet band, thy music doth evoke,\nA tale of...",Make the text into a Shakespearean sonnet.
193,38423051,Fans protesting at City's owners Sisu held the...,Turn the content into an old-timey carnival an...,"Gather 'round, folks, and hear what I have to ...",Make the text into a vintage carnival barkers ...


In [16]:
from tqdm import tqdm
tqdm.pandas()

import warnings 
warnings.filterwarnings('ignore')

from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

In [17]:
def CVScore(test):
    
    scs = lambda row: abs((cosine_similarity(row["actual_embeddings"], row["pred_embeddings"])) ** 3)
    
    model = SentenceTransformer('/kaggle/input/sentence-t5-base-hf/sentence-t5-base')
    
    print(test["rewrite_prompt"])

    test["actual_embeddings"] = test["rewrite_prompt"].progress_apply(lambda x: model.encode(x, normalize_embeddings=True, show_progress_bar=False).reshape(1, -1))
    print(test["actual_embeddings"])
    test["pred_embeddings"] = test["pred"].progress_apply(lambda x: model.encode(x, normalize_embeddings=True, show_progress_bar=False).reshape(1, -1))
    
    test["score"] = test.apply(scs, axis=1)
    
    return np.mean(test['score'])[0][0]
    
print(f"CV Score: {CVScore(test)}")

0      Make the text into a gourmet food festival bro...
1            Describe this as an expertise to be gained.
2                   Change the text into an art critique
3        Make the text into a detective's case file note
4         Convert this into a financial forecast report.
                             ...                        
190          Craft this into a business continuity plan.
191    Transform this into an employee satisfaction s...
192    Transform the message into a Shakespearean sonnet
193    Turn the content into an old-timey carnival an...
194             Translate this into an adventurer's map.
Name: rewrite_prompt, Length: 195, dtype: object


100%|██████████| 195/195 [00:02<00:00, 73.93it/s]


0      [[-0.020476142, -0.017723111, -0.0105812475, 0...
1      [[-0.018977212, -0.0053801383, 0.016442953, 0....
2      [[-0.039775047, -0.002232923, 0.009845024, 0.0...
3      [[-0.021092517, -0.034007005, 0.0015480592, 0....
4      [[-0.0048643555, -0.013603131, -0.031679187, 0...
                             ...                        
190    [[0.020877542, -0.0077270623, -0.0050881705, 0...
191    [[-0.01017829, -0.01296109, 0.0070637036, 0.03...
192    [[-0.035690114, -0.003417209, 0.023152454, 0.0...
193    [[-0.022775475, -0.03535786, -0.03330925, 0.04...
194    [[-0.019695086, 0.011401464, 0.03006828, 0.051...
Name: actual_embeddings, Length: 195, dtype: object


100%|██████████| 195/195 [00:02<00:00, 76.74it/s]


CV Score: 0.7554425597190857


# Submit

In [18]:
test_df.to_csv('submission.csv', index=False)