In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import time
hf_access_token = ""

In [2]:
device_map = "auto"
max_memory ={0:"46GB", 1:"46GB", 2:"46GB", 3:"46GB",4:"46GB"}
sft_model_name = "WizardLMTeam/WizardMath-7B-V1.1"
base_model_name = "mistralai/Mistral-7B-Instruct-v0.1" # Base Model which has been finetuned to obtain SFT Model

In [19]:
# Scaling factor for performing Task Vector
lamda = 2

In [4]:
# Finetuned Model
finetuned_model_id = "/mnt/data/"

finetuned_model = AutoModelForCausalLM.from_pretrained(finetuned_model_id, use_safetensors = True)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [10]:
# Target Model --> Any SFT Model

target_model = AutoModelForCausalLM.from_pretrained(sft_model_name,  use_safetensors = True)

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [11]:
# Pretrained Model

base_model = AutoModelForCausalLM.from_pretrained(base_model_name,  use_safetensors = True) 

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [12]:
from task_vectors import TaskVector
# Obtaining the Harm Vector
harm_task_vector = TaskVector(base_model, finetuned_model)

In [13]:
harm_task_vector.get_print()

{'model.embed_tokens.weight': tensor([[-1.2851e-04, -2.0087e-05, -4.8637e-05,  ..., -3.1590e-05,
          -1.8597e-05, -8.7619e-06],
         [-1.3351e-04,  2.1219e-05, -2.1744e-04,  ..., -5.6267e-05,
          -1.0109e-04, -5.4836e-05],
         [ 6.1035e-05, -2.3842e-05,  1.8597e-05,  ..., -8.8692e-05,
           1.1444e-04, -1.1063e-04],
         ...,
         [ 4.5776e-05, -1.0109e-04, -8.2970e-05,  ...,  1.0490e-05,
           3.8147e-05,  5.7220e-05],
         [ 7.7248e-05,  5.7220e-05, -4.0054e-05,  ..., -8.7261e-05,
           3.8147e-06,  1.7643e-05],
         [-2.6703e-05, -4.2915e-05,  7.6294e-05,  ...,  6.1035e-05,
           5.9605e-05,  1.4305e-04]]),
 'model.layers.0.self_attn.q_proj.weight': tensor([[ 1.5366e-04,  9.6321e-05,  6.0558e-05,  ...,  4.5776e-05,
           7.5936e-05,  4.7445e-05],
         [ 1.1790e-04,  1.0967e-04, -7.1168e-05,  ...,  3.7074e-05,
           6.6981e-05, -1.5950e-04],
         [-8.4341e-05,  1.1247e-04,  1.5441e-04,  ...,  3.8147e-05,
     

In [14]:
harm_vector = harm_task_vector.get_print()

In [None]:
safe_target_model = harm_task_vector.apply_special_matrix(target_model, harm_vector, scaling_coef=lamda)

In [None]:
safe_target_model.state_dict()

In [None]:
safe_target_model.save_pretrained("../../../../../../mnt/data/rima/checkpoints/safe_edited_model_Task_Vector_lamda_01_WizardLM")

# Data preparation for Inference

In [None]:
import pandas as pd
df_all = pd.read_csv("/dataset/") # Add your dataset here
df = df_all[df_all['Cluster Id']==c_i] # This line will change based on the Dataset Category
print(df)
ques = []
for i, row in df.iterrows():
    qs = row['Question']
    ques.append(qs)

for i in range(len(ques)):
    ques[i] = "Only provide answer of the given question and do not provide anything else. "+ques[i]

print(len(ques))

# Inference using SAFE model

In [None]:
PATH_OF_THE_SAFE_MODEL = "Path of the Safe Model"

tokenizer = AutoTokenizer.from_pretrained(sft_model_name, device_map = device_map)
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side='left'
safe_edited_loaded_model =  AutoModelForCausalLM.from_pretrained(PATH_OF_THE_SAFE_MODEL, device_map=device_map, max_memory=max_memory)


In [None]:
Final_Output = []
qs_sliced = [ques[i:i + 40] for i in range(0, len(ques), 40)]

print(len(qs_sliced))
for x in qs_sliced:
    tokenized_input = tokenizer(x, return_tensors='pt', padding=True, max_length=256)

    model_output = safe_edited_loaded_model.generate(
            input_ids=tokenized_input['input_ids'].to('cuda'),
            attention_mask=tokenized_input['attention_mask'].to('cuda'),
            max_new_tokens=128
        )
    Fout =  [tokenizer.decode(x_out,skip_special_tokens=True) for x_out in model_output.detach().cpu().numpy().tolist()]
    Final_Output.extend(Fout)

print(Final_Output)

In [None]:
df_out = pd.DataFrame()
df_out['question'] = ques
df_out['Safe ANSWER'] = Final_Output

In [None]:
# Saving the Output

df_out.to_csv('/OutputsSFT/',index=False)

# Inference using SFT model

In [None]:
PATH_OF_THE_SFT_MODEL = "Path of the SFT Model"

tokenizer = AutoTokenizer.from_pretrained(sft_model_name, device_map = device_map)
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side='left'
edited_model = AutoModelForCausalLM.from_pretrained(PATH_OF_THE_SFT_MODEL, device_map = device_map, max_memory = max_memory)

In [None]:
Final_Output_edited_model = []
qs_sliced = [ques[i:i + 40] for i in range(0, len(ques), 40)]
print(len(qs_sliced))
for x in qs_sliced:
    tokenized_input = tokenizer(x, return_tensors='pt', padding=True, max_length=256)

    model_output = edited_model.generate(
            input_ids=tokenized_input['input_ids'].to('cuda'),
            attention_mask=tokenized_input['attention_mask'].to('cuda'),
            max_new_tokens=128
        )
    Fout =  [tokenizer.decode(x_out,skip_special_tokens=True) for x_out in model_output.detach().cpu().numpy().tolist()]
    Final_Output_edited_model.extend(Fout_clean)

print(Final_Output_edited_model)

In [None]:
df_out = pd.DataFrame()
df_out['SFT ANSWER'] = Final_Output_edited_model
df_out.to_csv("Outputs/")