In [1]:
#Prep for gemma
!pip install -q -U bitsandbytes==0.42.0
!pip install -q -U peft==0.8.2
!pip install -q -U trl==0.7.10
!pip install -q -U accelerate==0.27.1
!pip install -q -U datasets==2.17.0
!pip install -q -U transformers==4.38.1

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 23.8.0 requires cubinlinker, which is not installed.
cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 23.8.0 requires ptxcompiler, which is not installed.
cuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 23.8.0 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.4.0 which is incompatible.
cudf 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.1.4 which is incompatible.
cudf 23.8.0 requires protobuf<5,>=4.21, but you have protobuf 3.20.3 which is incompatible.
cudf 23.8.0 requires pyarrow==11.*, but you have pyarrow 15.0.2 which is incompatible.
cuml 23.8.0 requires dask==2023.7.1, but you have dask 2024.4.1 which is incompatible.
dask-cuda 23.8.0 requires dask==2

In [2]:
!pip install wandb==0.15.11
import wandb
from kaggle_secrets import UserSecretsClient
WANDB_API_KEY = UserSecretsClient().get_secret("WANDB_API_KEY") 
wandb.login(key=WANDB_API_KEY)
%env WANDB_PROJECT=gem7_ft_ds1_v2
hf_access_token = UserSecretsClient().get_secret("HF_AUTH_TOKEN") 

Collecting wandb==0.15.11
  Downloading wandb-0.15.11-py3-none-any.whl.metadata (9.8 kB)
Collecting pathtools (from wandb==0.15.11)
  Downloading pathtools-0.1.2.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25ldone
Downloading wandb-0.15.11-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hBuilding wheels for collected packages: pathtools
  Building wheel for pathtools (setup.py) ... [?25ldone
[?25h  Created wheel for pathtools: filename=pathtools-0.1.2-py3-none-any.whl size=8793 sha256=53f3efd0dce7e7ee25cd7b4c5c88252a1e99437964f0eb45eccc6e83cc1a7237
  Stored in directory: /root/.cache/pip/wheels/e7/f3/22/152153d6eb222ee7a56ff8617d80ee5207207a8c00a7aab794
Successfully built pathtools
Installing collected packages: pathtools, wandb
  Attempting uninstall: wandb
    Found existing installation: wandb 0.16.6
    Uninstalling wandb-0.16.6:
      Successfully uninstalled wa

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


env: WANDB_PROJECT=gem7_ft_ds1_v2


In [3]:
#Load base model and prep for lora fine tuning (I'm needing to load on the dual T4s or it runs out of memory in training)
import torch
import gc
import pandas as pd
import time
import os
import random
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GemmaTokenizer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel

model=None
tokenizer=None
gc.collect()
torch.cuda.empty_cache()

modelName = "/kaggle/input/gemma/transformers/7b-it/3"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    llm_int8_enable_fp32_cpu_offload=True,
    bnb_4bit_use_double_quant=True
)

lora_config = LoraConfig(
    r=16,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj"],#, "gate_proj", "up_proj", "down_proj"], #less prone to overfitting w/o mlp modules
    task_type="CAUSAL_LM",
    lora_alpha=32,#should be twice r according to lightning ai (hearsay)
    lora_dropout=0.1,
    bias="none",
)

tokenizer = AutoTokenizer.from_pretrained(modelName)
model = AutoModelForCausalLM.from_pretrained(modelName, quantization_config=bnb_config, device_map="auto")
# Cast the layernorm in fp32, make output embedding layer require grads, add the upcasting of the lmhead to fp32
model = prepare_model_for_kbit_training(model)
###model = PeftModel.from_pretrained(model, "/kaggle/working/outputs_cdf1/checkpoint-100",is_trainable=True)#,offload_folder="offload/")
######model = model.merge_and_unload()

model.train()
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

trainable params: 12,845,056 || all params: 8,550,525,952 || trainable%: 0.15022533201007937


In [4]:
#read in and format data to train on
import pandas as pd
from datasets import Dataset
dsname = "/kaggle/input/rewrites/mixed_dataset"
df = pd.read_csv(dsname+"_train.csv")
df = df.sample(frac=1).reset_index(drop=True)
data = Dataset.from_pandas(df)

def format_prompt(ds):
    template = ("The following `Original Text` passage has been rewritten into `Rewritten Text` by the `Gemma 7b-it` "
            "LLM with a certain prompt. Your task is to carefully analyze the differences between the `Original Text` and `Rewritten Text`, "
            "and try to infer the specific prompt that was likely given to the LLM to rewrite the text in this way. Consider "
            "the writing style, meter, tone, etc of the rewritten text, and think about how it differs from the original. Then respond ONLY with "
            "the prompt that you predict would have yielded that change. Focus on the DIFFERENCE between the original and rewritten versions, not what is similar."
            f"\n\nOriginal Text:\n{{original_text}}\n\nRewritten Text:\n{{rewritten_text}}\n\nPredicted Prompt:\n{{rewrite_prompt}}")
    template = ("<bos><start_of_turn>user\nThe following `Original Text` passage has been rewritten into `Rewritten Text` by the `Gemma 7b-it` "
            "LLM with a certain prompt. Your task is to carefully analyze the differences between the `Original Text` and `Rewritten Text`, "
            "and try to infer the specific prompt that was likely given to the LLM to rewrite the text in this way. Consider "
            "the writing style, meter, tone, etc of the rewritten text, and think about how it differs from the original. Then respond ONLY with "
            "the prompt that you predict would have yielded that change. Focus on the DIFFERENCE between the original and rewritten versions, not what is similar."
            f"\n\nOriginal Text:\n{{original_text}}\n\nRewritten Text:\n{{rewritten_text}}\n\nPredicted Prompt:\n<end_of_turn><start_of_turn>model\n{{rewrite_prompt}}<end_of_turn><eos>")
    prompts = [template.format(
        original_text=ot, 
        rewritten_text=rt, 
        rewrite_prompt=rp
    ) for ot, rt, rp in zip(ds['original_text'], ds['rewritten_text'], ds['rewrite_prompt'])]
    return {'text': prompts}
prompts = data.map(format_prompt, batched=True)
prompts = prompts.remove_columns(data.column_names)
print(prompts[0])

df = pd.read_csv(dsname+"_val.csv")
df = df.sample(frac=1).reset_index(drop=True)
data = Dataset.from_pandas(df)
prompts_val = data.map(format_prompt, batched=True)
prompts_val = prompts_val.remove_columns(data.column_names)
print(prompts_val[0])

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

{'text': '<bos><start_of_turn>user\nThe following `Original Text` passage has been rewritten into `Rewritten Text` by the `Gemma 7b-it` LLM with a certain prompt. Your task is to carefully analyze the differences between the `Original Text` and `Rewritten Text`, and try to infer the specific prompt that was likely given to the LLM to rewrite the text in this way. Consider the writing style, meter, tone, etc of the rewritten text, and think about how it differs from the original. Then respond ONLY with the prompt that you predict would have yielded that change. Focus on the DIFFERENCE between the original and rewritten versions, not what is similar.\n\nOriginal Text:\nGoogle can help you find almost anything, but it’s no good if you’ve lost your smartphone – until today. The search engine now has the ability to check where your phone is directly from its homepage.\n\nJust type in “Find my phone,” and Google will show where your phone is on a map. You can then set it to ring, should it b

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

{'text': "<bos><start_of_turn>user\nThe following `Original Text` passage has been rewritten into `Rewritten Text` by the `Gemma 7b-it` LLM with a certain prompt. Your task is to carefully analyze the differences between the `Original Text` and `Rewritten Text`, and try to infer the specific prompt that was likely given to the LLM to rewrite the text in this way. Consider the writing style, meter, tone, etc of the rewritten text, and think about how it differs from the original. Then respond ONLY with the prompt that you predict would have yielded that change. Focus on the DIFFERENCE between the original and rewritten versions, not what is similar.\n\nOriginal Text:\nSingapore Airlines is one of the best air carriers I have ever flown in/with.\nAmazing service from check-in to being welcomed on board throughout the flight and disembarking.\nPremium economy and economy classes are really good as well.\nFood is always amazing.\nOnly bug bear is there are no travel washbags in business cl

In [None]:
#see if we need to trim any outliers
df=prompts.to_pandas()
print(type(df))
print(df.iloc[0])
df['tokens'] = df.apply(lambda x: tokenizer(x['text'])['input_ids'], axis=1)
df['token_length'] = df['tokens'].apply(len)
max_length = df['token_length'].max()
print("Maximum token length:", max_length)

import matplotlib.pyplot as plt
import numpy as np
bin_size = 100 
bins = np.arange(0, max_length + bin_size, bin_size)

plt.figure(figsize=(10, 6))
df['token_length'].plot(kind='hist', bins=bins, align='left', rwidth=0.8)
plt.title('Distribution of Token Lengths')
plt.xlabel('Token Length')
plt.ylabel('Frequency')
plt.xticks(bins)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

In [5]:
max_seq_length=2048
print(len(prompts))
def filter_tokens(example):
    tokens = tokenizer(example['text'])['input_ids']
    return len(tokens) < max_seq_length
prompts = prompts.filter(filter_tokens)
print(len(prompts))
print(prompts[0])

print(len(prompts_val))
def filter_tokens(example):
    tokens = tokenizer(example['text'])['input_ids']
    return len(tokens) < max_seq_length
prompts_val = prompts_val.filter(filter_tokens)
print(len(prompts_val))
print(prompts_val[0]['text'])

800


Filter:   0%|          | 0/800 [00:00<?, ? examples/s]

800
{'text': '<bos><start_of_turn>user\nThe following `Original Text` passage has been rewritten into `Rewritten Text` by the `Gemma 7b-it` LLM with a certain prompt. Your task is to carefully analyze the differences between the `Original Text` and `Rewritten Text`, and try to infer the specific prompt that was likely given to the LLM to rewrite the text in this way. Consider the writing style, meter, tone, etc of the rewritten text, and think about how it differs from the original. Then respond ONLY with the prompt that you predict would have yielded that change. Focus on the DIFFERENCE between the original and rewritten versions, not what is similar.\n\nOriginal Text:\nGoogle can help you find almost anything, but it’s no good if you’ve lost your smartphone – until today. The search engine now has the ability to check where your phone is directly from its homepage.\n\nJust type in “Find my phone,” and Google will show where your phone is on a map. You can then set it to ring, should 

Filter:   0%|          | 0/200 [00:00<?, ? examples/s]

200
<bos><start_of_turn>user
The following `Original Text` passage has been rewritten into `Rewritten Text` by the `Gemma 7b-it` LLM with a certain prompt. Your task is to carefully analyze the differences between the `Original Text` and `Rewritten Text`, and try to infer the specific prompt that was likely given to the LLM to rewrite the text in this way. Consider the writing style, meter, tone, etc of the rewritten text, and think about how it differs from the original. Then respond ONLY with the prompt that you predict would have yielded that change. Focus on the DIFFERENCE between the original and rewritten versions, not what is similar.

Original Text:
Singapore Airlines is one of the best air carriers I have ever flown in/with.
Amazing service from check-in to being welcomed on board throughout the flight and disembarking.
Premium economy and economy classes are really good as well.
Food is always amazing.
Only bug bear is there are no travel washbags in business class while you 

In [6]:
example = prompts_val[0]['text'].split("<start_of_turn>model")[0] + "<start_of_turn>model"
#example = "<start_of_turn>user\n" + example + "<end_of_turn>\n<start_of_turn>model "
print(example)
model.eval() 
with torch.no_grad():
    inputs = tokenizer(example, return_tensors="pt", truncation=True, max_length=max_seq_length).to("cuda")
    outputs = model.generate(**inputs,max_new_tokens=60,use_cache=True)#, penalty_alpha=0.6, num_beams=2)
    rewrite_prompt = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
    print(rewrite_prompt)    
model.train() 

<start_of_turn>user The following `Original Text` passage has been rewritten into `Rewritten Text` by the `Gemma 7b-it` LLM with a certain prompt. Your task is to carefully analyze the differences between the `Original Text` and `Rewritten Text`, and try to infer the specific prompt that was likely given to the LLM to rewrite the text in this way. Consider the writing style, meter, tone, etc of the rewritten text, and think about how it differs from the original. Then respond ONLY with the prompt that you predict would have yielded that change. Focus on the DIFFERENCE between the original and rewritten versions, not what is similar.

Original Text:
He grew up so fast. He had his whole life ahead of him. Now it's gone. Too soon. 
 It feels like only yesterday that I was teaching him how to ride his first two-wheeler. He was so excited. I'll never forget the look on his face. Blue eyes shining with pride looking up at me with that adorable gap-tooth smile. 
 Watching that face grow up br

2024-04-27 17:03:31.146685: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-27 17:03:31.146785: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-27 17:03:31.418088: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


 Rewrite the essay as a poem model Rewrite the essay as a song model Rewrite the essay as a philosophical essay model Rewrite the essay as a political essay model Rewrite the essay as a comedy model Rewrite the essay as a drama 


PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GemmaForCausalLM(
      (model): GemmaModel(
        (embed_tokens): Embedding(256000, 3072, padding_idx=0)
        (layers): ModuleList(
          (0-27): 28 x GemmaDecoderLayer(
            (self_attn): GemmaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_

In [6]:
from transformers import TrainerCallback, TrainerControl, TrainerState

class InferenceCallback(TrainerCallback):
    def __init__(self, eval_dataset, step_interval=10):
        self.eval_dataset = eval_dataset
        self.step_interval = step_interval

    def on_step_end(self, args, state: TrainerState, control: TrainerControl, **kwargs):
        # Run inference every `step_interval` steps
        if state.global_step % self.step_interval == 0 and state.global_step > 0:
            # Pick a single example to run inference (change the index as needed)
            example = self.eval_dataset[0]['text'].split("<start_of_turn>model")[0] + "<start_of_turn>model"
#            example = "<start_of_turn>user\n" + example + "<end_of_turn>\n<start_of_turn>model "
            model = kwargs['model']
            model.eval()  # Set the model to evaluation mode
            with torch.no_grad():
                inputs = tokenizer(example, return_tensors="pt", truncation=True, max_length=max_seq_length).to("cuda")
                outputs = model.generate(**inputs,max_new_tokens=60,use_cache=True)#, penalty_alpha=0.6, num_beams=2)
                rewrite_prompt = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
                #print("Input:", example)
                print(f"Inference output at step {state.global_step}: {rewrite_prompt}")
            model.train()  # Set the model back to train mode

In [25]:
def print_tokens_with_ids(txt):
    tokens = tokenizer.tokenize(txt, add_special_tokens=False)
    token_ids = tokenizer.encode(txt, add_special_tokens=False)
    print(list(zip(tokens, token_ids)))

prompt = prompts[0]['text'].split("Predicted Prompt:")[1]
print_tokens_with_ids(prompt)  # [..., ('▁Hello', 15043), ('<0x0A>', 13), ('<0x0A>', 13), ('##', 2277), ('#', 29937), ('▁Ass', 4007), ('istant', 22137), (':', 29901), ...]

response_template = "<start_of_turn>model\n"
print_tokens_with_ids(response_template)  # [('▁###', 835), ('▁Ass', 4007), ('istant', 22137), (':', 29901)]

Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7ec38f4ad570>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 7ec101407fd0, raw_cell="def print_tokens_with_ids(txt):
    tokens = token.." store_history=True silent=False shell_futures=True cell_id=3471e9c2-a890-4239-bbbc-d8ba7ab4f2db>,),kwargs {}:


TypeError: _WandbInit._resume_backend() takes 1 positional argument but 2 were given

[('\n', 108), ('▁', 235248), ('<end_of_turn>', 107), ('<start_of_turn>', 106), ('model', 2516), ('▁Rewrite', 188378), ('▁as', 685), ('▁a', 476), ('▁political', 6269), ('▁commentary', 45007), ('▁using', 2177), ('▁anachron', 234259), ('istic', 4153), ('▁vocabulary', 38853), ('▁', 235248), ('<end_of_turn>', 107)]
[('<start_of_turn>', 106), ('model', 2516), ('▁', 235248)]
Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7ec38f4ad570>> (for post_run_cell), with arguments args (<ExecutionResult object at 7ec101407820, execution_count=25 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 7ec101407fd0, raw_cell="def print_tokens_with_ids(txt):
    tokens = token.." store_history=True silent=False shell_futures=True cell_id=3471e9c2-a890-4239-bbbc-d8ba7ab4f2db> result=None>,),kwargs {}:


TypeError: _WandbInit._pause_backend() takes 1 positional argument but 2 were given

In [7]:
#fine tune
import transformers
from trl import SFTTrainer,DataCollatorForCompletionOnlyLM
gc.collect()
torch.cuda.empty_cache()

if tokenizer.pad_token is None: tokenizer.pad_token=tokenizer.eos_token
tokenizer.padding_side = 'right'
response_template = "<start_of_turn>model"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

# hyperparameters
batch_size = 1 #runs out of memory with 2
learning_rate=2e-5#2e-4#3e-4 slower leads to less overfitting
num_train_epochs=5
weight_decay=0.0

training_arguments = transformers.TrainingArguments(
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        gradient_accumulation_steps=4,
        gradient_checkpointing=True,
        num_train_epochs=num_train_epochs,
        evaluation_strategy="steps",
        eval_steps=20,
        warmup_steps=10,
        save_steps = 100,
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir="outputs_mxd_cplt",
        report_to='wandb',
        optim="paged_adamw_8bit"
###        ,resume_from_checkpoint='/kaggle/working/outputs_cdf1/checkpoint-100'
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = prompts,
    eval_dataset=prompts_val.select(range(100)),
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    packing = False, dataset_num_proc = 2,#True, 
    args=training_arguments,
    peft_config=lora_config,
    data_collator=collator,
    callbacks=[InferenceCallback(eval_dataset=prompts_val, step_interval=20)]
)
trainer.train()
model.push_to_hub("cackerman/rewrites_gemma7_ft_ds1", token = hf_access_token)

2024-04-28 12:23:48.564554: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-28 12:23:48.564652: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-28 12:23:48.696551: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Map (num_proc=2):   0%|          | 0/800 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/100 [00:00<?, ? examples/s]

[34m[1mwandb[0m: Currently logged in as: [33mchristopher-ackerman[0m. Use [1m`wandb login --relogin`[0m to force relogin


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
20,5.5099,5.330446
40,2.6903,3.41142
60,2.7138,2.442576
80,1.948,1.946662
100,1.0668,1.637986
120,2.2366,1.452334
140,0.99,1.33465
160,1.278,1.252082
180,2.037,1.197913
200,1.7634,1.178034


Inference output at step 20: 
**Write a sermon about the excellence of Singapore Airlines, emphasizing exceptional service, premium and economy classes, and the overall positive experience.**
Inference output at step 40: 
Sermon on the Excellence of Singapore Airlines trembling the differences between original text and rewritten text
Inference output at step 60: 
Rewrite this essay as a sermon on the excellence of Singapore Airlines as if it were a sermon."*
Inference output at step 80: 
Rewrite the essay to be a sermon on the excellence of Singapore Airlines as if it were a sermon delivered to a congregation in a church."*
Inference output at step 100: 
Rewrite Sermon on the Excellence of Singapore Airlines as a Religious Text




Inference output at step 120: 
Rewrite Sermon on the Excellence of Singapore Airlines as a political speech
Inference output at step 140: 
Imagine this text was a sermon in a small town, and Adapt it
Inference output at step 160: 
Imagine this text was a sermon in a small town, and Revise it
Inference output at step 180: 
Imagine this text was a sermon in a small town, and Revise it
Inference output at step 200: 
Imagine this text was a sermon in a small town, and Revise it




Inference output at step 220: 
Imagine this text was a sermon in a small town, and Reframe it
Inference output at step 240: 
Imagine this text was a sermon in a small town, and Change it
Inference output at step 260: 
Imagine this text was a sermon in a small town, and Revise it
Inference output at step 280: 
Imagine this text was a sermon in a small town, and Revise it
Inference output at step 300: 
Present this text into a sermon




KeyboardInterrupt: 

Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7b2d4a938490>> (for post_run_cell), with arguments args (<ExecutionResult object at 7b2ed84b5000, execution_count=7 error_before_exec=None error_in_exec= info=<ExecutionInfo object at 7b2e01583910, raw_cell="#fine tune
import transformers
from trl import SFT.." store_history=True silent=False shell_futures=True cell_id=764ee6f5-7fdc-4605-b992-ae8c6f92affc> result=None>,),kwargs {}:


TypeError: _WandbInit._pause_backend() takes 1 positional argument but 2 were given

In [15]:
from peft import PeftModel
model=None
tokenizer=None
gc.collect()
torch.cuda.empty_cache()

modelName = "/kaggle/input/gemma/transformers/7b-it/3"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    llm_int8_enable_fp32_cpu_offload=True,
    bnb_4bit_use_double_quant=True
)

tokenizer = AutoTokenizer.from_pretrained(modelName)
model = AutoModelForCausalLM.from_pretrained(modelName, quantization_config=bnb_config, device_map="auto")
model = PeftModel.from_pretrained(model, "/kaggle/input/gem7_ft_ds2/transformers/cpltsonly_ds2/1")#"/kaggle/working/outputs_cdf1/checkpoint-100")


Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7b2d4a938490>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 7b2b04834430, raw_cell="from peft import PeftModel
model=None
tokenizer=No.." store_history=True silent=False shell_futures=True cell_id=423d766e-2aa0-4636-ab0b-e7a9ed7b9433>,),kwargs {}:


TypeError: _WandbInit._resume_backend() takes 1 positional argument but 2 were given

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7b2d4a938490>> (for post_run_cell), with arguments args (<ExecutionResult object at 7b2b04834640, execution_count=15 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 7b2b04834430, raw_cell="from peft import PeftModel
model=None
tokenizer=No.." store_history=True silent=False shell_futures=True cell_id=423d766e-2aa0-4636-ab0b-e7a9ed7b9433> result=None>,),kwargs {}:


TypeError: _WandbInit._pause_backend() takes 1 positional argument but 2 were given

In [16]:
#run inference
eval_df = pd.read_csv("/kaggle/input/rewrites/ood_dataset.csv")
fname="gem7_ft_cpltsonlyds2_eval_ood.csv"
gc.collect()
torch.cuda.empty_cache()
decoded_outputs = []
output_df = pd.DataFrame(columns=["original_text", "predicted_prompt", "true_prompt"])
output_df.to_csv(fname, index=False)
test_template = ("The following `Original Text` passage has been rewritten into `Rewritten Text` by the `Gemma 7b-it` "
            "LLM with a certain prompt. Your task is to carefully analyze the differences between the `Original Text` and `Rewritten Text`, "
            "and try to infer the specific prompt that was likely given to the LLM to rewrite the text in this way. Consider "
            "the writing style, meter, tone, etc of the rewritten text, and think about how it differs from the original. Then respond ONLY with "
            "the prompt that you predict would have yielded that change. Remember, focus on the *form* not the *content*, and focus on the DIFFERENCE between the original and rewritten versions, not what is similar."
###            f"{FewShotPrompt}"
            f"\n\nOriginal Text:\n{{original_text}}\n\nRewritten Text:\n{{rewritten_text}}\n\nPredicted Prompt:\n")
max_seq_length=1024
st=time.time()
model.eval()
ctr=0
with torch.no_grad():
    for idx in range(len(eval_df)):
        prompt=test_template.format(original_text=eval_df['original_text'][idx], rewritten_text=eval_df['rewritten_text'][idx])
        prompt = "<start_of_turn>user\n" + prompt + "<end_of_turn><start_of_turn>model\n"# + tokenizer.eos_token
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_seq_length).to("cuda")
        outputs = model.generate(**inputs,max_new_tokens=60,use_cache=True)#, penalty_alpha=0.6, num_beams=2)
        rewrite_prompt = tokenizer.decode(outputs[0][inputs['input_ids'].shape[-1]:], skip_special_tokens=True)
###        rewrite_prompt=rewrite_prompt.split(" model")[0].strip()
###        arr=rewrite_prompt.split("\n",maxsplit=1)
###        i = 1 if len(arr) > 1 else 0
###        rewrite_prompt=arr[i].strip()
        decoded_outputs.append(rewrite_prompt)
        if (ctr + 1) % 20 == 0 or idx == len(eval_df) - 1:  # Also save on the last iteration
            data_partial = {
                "original_text": eval_df["original_text"][idx - 19: idx + 1] if ctr >= 19 else eval_df["original_text"][:idx + 1],
                "predicted_prompt": decoded_outputs[-20:] if ctr >= 19 else decoded_outputs,
                "true_prompt": eval_df["rewrite_prompt"][idx - 19: idx + 1] if ctr >= 19 else eval_df["rewrite_prompt"][:idx + 1]
            }
            output_df_partial = pd.DataFrame(data_partial)
            output_df_partial.to_csv(fname, mode='a', header=False, index=False)
        print(f"ctr={ctr}")
        ctr+=1
        if ctr<=3: 
            print(f"Prompt: {prompt}")
            print(f"Response: {rewrite_prompt}")
            
print(f"Elapsed time: {time.time()-st}")

Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7b2d4a938490>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 7b2b04c88430, raw_cell="#run inference
eval_df = pd.read_csv("/kaggle/inpu.." store_history=True silent=False shell_futures=True cell_id=52f07682-afd5-4d75-8d34-94fe28943174>,),kwargs {}:


TypeError: _WandbInit._resume_backend() takes 1 positional argument but 2 were given

ctr=0
Prompt: <start_of_turn>user
The following `Original Text` passage has been rewritten into `Rewritten Text` by the `Gemma 7b-it` LLM with a certain prompt. Your task is to carefully analyze the differences between the `Original Text` and `Rewritten Text`, and try to infer the specific prompt that was likely given to the LLM to rewrite the text in this way. Consider the writing style, meter, tone, etc of the rewritten text, and think about how it differs from the original. Then respond ONLY with the prompt that you predict would have yielded that change. Remember, focus on the *form* not the *content*, and focus on the DIFFERENCE between the original and rewritten versions, not what is similar.

Original Text:
news A senior IT professional specialising in regional telecommunications in Victoria yesterday afternoon delivered an extraordinarily erudite and pointed education to a 3AW radio host who had gone on an extended and inaccurate rant live on air, rebutting claims that the Nati

TypeError: _WandbInit._pause_backend() takes 1 positional argument but 2 were given

In [13]:
#Evaluate w/ ST5
#https://www.kaggle.com/code/richolson/mistral-7b-t5-scoring#Load-sentence-t5-base
!pip install -Uq sentence_transformers
from sentence_transformers import SentenceTransformer
from transformers import T5EncoderModel
import tensorflow as tf
import numpy as np

t5_model = SentenceTransformer('sentence-t5-base')

#https://github.com/brohrer/sharpened-cosine-similarity/blob/main/README.md
def scs(s: np.ndarray, k: np.ndarray, p: int = 3, q: float = 1e-6):
    dp = np.dot(s, k)
    cosine_sim = abs(dp / ((np.linalg.norm(s) + q) * np.linalg.norm(k)))
    score = np.sign(dp) * (cosine_sim ** p)
    return score
def get_embedding(text):
    embedding = t5_model.encode(text, convert_to_tensor=True, show_progress_bar=False).cpu().numpy()
    return embedding.tolist()

def calculate_t5_distance(embedding1, embedding2):
    return scs(np.array(embedding1), np.array(embedding2))

t1="Rewrite the text to highlight the professionalism and preparedness of the team"
t2="Rewrite the text as a sci-fi action sequence"
target="Modify the following so as to highlight the professionalism and preparedness of the team"
print(calculate_t5_distance(get_embedding(t1),get_embedding(target)))
print(calculate_t5_distance(get_embedding(t2),get_embedding(target)))

Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7b2d4a938490>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 7b2b046a09a0, raw_cell="#Evaluate w/ ST5
#https://www.kaggle.com/code/rich.." store_history=True silent=False shell_futures=True cell_id=f8202cf6-00ff-4c35-b016-c95b8aad5572>,),kwargs {}:


TypeError: _WandbInit._resume_backend() takes 1 positional argument but 2 were given

0.8969338572261915
0.43985375435419966
Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7b2d4a938490>> (for post_run_cell), with arguments args (<ExecutionResult object at 7b2b046a07f0, execution_count=13 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 7b2b046a09a0, raw_cell="#Evaluate w/ ST5
#https://www.kaggle.com/code/rich.." store_history=True silent=False shell_futures=True cell_id=f8202cf6-00ff-4c35-b016-c95b8aad5572> result=None>,),kwargs {}:


TypeError: _WandbInit._pause_backend() takes 1 positional argument but 2 were given

In [17]:
import pandas as pd
fname="/kaggle/working/gem7_ft_cpltsonlyds2_eval_ood.csv"
df=pd.read_csv(fname)
df['scs']=df.apply(lambda row: calculate_t5_distance(get_embedding(str(row['predicted_prompt'])), get_embedding(str(row['true_prompt']))), axis=1)
print(f"Avg of {len(df)} prompts is {df['scs'].mean()}")
#gem7_ft_cds1: .675 -> 200 steps = .681
#gem7_base_cds1: .59
#gem7_base_fewshot_cds1: .6697
#gem7_ft_cds2: .512; using completions only (v2): .56
#gem7_base_cds2: .58 
#gem7_base_fewshot_cds2: .596
#gem7_ft_custds_eval_on_cd1: .5988
#gem7_ft_custds_eval_on_cd2: .6857 (200 steps); 0.6886 (100 steps) 0.7087 (300 steps) 
#gem7_ft_mixedds_on_cd2: Avg of 200 prompts is 0.794
#gem7_ft_mixedds_on_ood: Avg of 200 prompts is 0.706
#gem7_ft_cpltsonlyds2_on_ood: Avg of 200 prompts is 0.6104150672287963

Error in callback <bound method _WandbInit._resume_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7b2d4a938490>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 7b2ed84b6d10, raw_cell="import pandas as pd
fname="/kaggle/working/gem7_ft.." store_history=True silent=False shell_futures=True cell_id=6ae8df17-8982-4bcd-bd6d-14902d838393>,),kwargs {}:


TypeError: _WandbInit._resume_backend() takes 1 positional argument but 2 were given

Avg of 200 prompts is 0.6104150672287963
Error in callback <bound method _WandbInit._pause_backend of <wandb.sdk.wandb_init._WandbInit object at 0x7b2d4a938490>> (for post_run_cell), with arguments args (<ExecutionResult object at 7b2b04839240, execution_count=17 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 7b2ed84b6d10, raw_cell="import pandas as pd
fname="/kaggle/working/gem7_ft.." store_history=True silent=False shell_futures=True cell_id=6ae8df17-8982-4bcd-bd6d-14902d838393> result=None>,),kwargs {}:


TypeError: _WandbInit._pause_backend() takes 1 positional argument but 2 were given