In [1]:
%%capture
%pip install -U transformers 
%pip install -U datasets 
%pip install -U accelerate 
%pip install -U peft 
%pip install -U trl 
%pip install -U bitsandbytes 
%pip install -U wandb

In [3]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format


In [5]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(token = hf_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [6]:
wb_token = user_secrets.get_secret("wandb")

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune Llama 3 8B on FAQ Dataset', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33msatwikkishore6953[0m ([33msatwikkishore[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [7]:
base_model = "/kaggle/input/llama-3/transformers/8b-chat-hf/1"
dataset_name = "/kaggle/input/mental-health-counseling-conversations-k/archive/Dataset.csv"
new_model = "llama-3-8b-chat-MH"

In [9]:
# Set torch dtype and attention implementation
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"

In [10]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [11]:
import pandas as pd
df=messages = pd.read_csv(dataset_name)
# df = df[['instruction','category','intent','response']]
# for i,j in df.iterrows():
#     j['instruction'],j['response'] = j['instruction'].replace("{{Order Number}}", ""),j['response'].replace("{{Order Number}}", "").replace("{{","").replace("}}","")
# df = df[df['category'] != 'SHIPPING']
# new_df  = pd.DataFrame()
# count = 0
# for j in df['category'].unique():
#     a = df[df['category'] == j]
#     if count == 2:
#         break
#     for i in a['intent'].unique():
#         temp_df = df.loc[df['intent'] == i].head(100)
#         new_df = pd.concat([new_df, temp_df],ignore_index = True)
#     count += 1

# df = new_df
df.tail()

Unnamed: 0,Context,Response
3507,My grandson's step-mother sends him to school ...,Absolutely not! It is never in a child's best ...
3508,My boyfriend is in recovery from drug addictio...,I'm sorry you have tension between you and you...
3509,The birth mother attempted suicide several tim...,"The true answer is, ""no one can really say wit..."
3510,I think adult life is making him depressed and...,How do you help yourself to believe you requir...
3511,I just took a job that requires me to travel f...,hmm this is a tough one!


In [28]:
from datasets import Dataset
dataset = Dataset.from_pandas(df)

for i in range(3):
    print(dataset[i])

{'Context': "I'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here.\n   I've never tried or contemplated suicide. I've always wanted to fix my issues, but I never get around to it.\n   How can I change my feeling of being worthless to everyone?", 'Response': "If everyone thinks you're worthless, then maybe you need to find new people to hang out with.Seriously, the social context in which a person lives is a big influence in self-esteem.Otherwise, you can go round and round trying to understand why you're not worthless, then go back to the same crowd and be knocked down again.There are many inspirational messages you can find in social media. \xa0Maybe read some of the ones which state that no person is worthless, and that everyone has a good purpose to their life.Also, since our culture is so saturated with the belief that if someone doesn't feel good about themselves that this is someho

In [29]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

In [25]:
df = df.dropna()


In [26]:
df.isnull().sum()

Context     0
Response    0
dtype: int64

In [30]:
 # Only use 1000 samples for quick demo

def format_chat_template(row):
    row_json = [{"role": "user", "content": row["Context"]},
               {"role": "assistant", "content": row["Response"]}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc= 4,
)

dataset

  self.pid = os.fork()


Map (num_proc=4):   0%|          | 0/3508 [00:00<?, ? examples/s]

Dataset({
    features: ['Context', 'Response', '__index_level_0__', 'text'],
    num_rows: 3508
})

In [31]:
dataset['text'][3]

"<|im_start|>user\nI'm going through some things with my feelings and myself. I barely sleep and I do nothing but think about how I'm worthless and how I shouldn't be here.\n   I've never tried or contemplated suicide. I've always wanted to fix my issues, but I never get around to it.\n   How can I change my feeling of being worthless to everyone?<|im_end|>\n<|im_start|>assistant\nTherapy is essential for those that are feeling depressed and worthless. When I work with those that are experiencing concerns related to feeling of depression and issues with self esteem. I generally work with my client to help build coping skills to reduce level of depression and to assist with strengthening \xa0self esteem, by guiding my client with CBT practices. CBT helps with gaining a better awareness of how your thought process influences your\xa0belief system, and how your beliefs impact your actions and the outcome of your behaviors. \xa0This process isn’t easy but it helps teach an individual that 

In [32]:
dataset = dataset.train_test_split(test_size=0.1)

In [42]:
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=6,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=11,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=True,  # Enable mixed precision
    bf16=False, # Use bf16 if supported by your hardware (alternative to fp16)
    group_by_length=True,
    report_to="wandb"
)




In [44]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    max_seq_length= 512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)
model.gradient_checkpointing_enable()  # Enable gradient checkpointing



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/3157 [00:00<?, ? examples/s]

Map:   0%|          | 0/351 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


In [46]:
!export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True


  pid, fd = os.forkpty()


In [48]:
import torch
torch.cuda.empty_cache()


In [55]:
for param in model.parameters():
    if param.dtype in [torch.float32, torch.float64, torch.complex64, torch.complex128]:
        param.requires_grad = True


In [58]:
for name, param in model.named_parameters():
    if not param.requires_grad:
        print(f"Parameter {name} does not require gradients. Dtype: {param.dtype}")


Parameter base_model.model.base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight does not require gradients. Dtype: torch.uint8
Parameter base_model.model.base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight does not require gradients. Dtype: torch.uint8
Parameter base_model.model.base_model.model.model.layers.0.self_attn.v_proj.base_layer.weight does not require gradients. Dtype: torch.uint8
Parameter base_model.model.base_model.model.model.layers.0.self_attn.o_proj.base_layer.weight does not require gradients. Dtype: torch.uint8
Parameter base_model.model.base_model.model.model.layers.0.mlp.gate_proj.base_layer.weight does not require gradients. Dtype: torch.uint8
Parameter base_model.model.base_model.model.model.layers.0.mlp.up_proj.base_layer.weight does not require gradients. Dtype: torch.uint8
Parameter base_model.model.base_model.model.model.layers.0.mlp.down_proj.base_layer.weight does not require gradients. Dtype: torch.uint8
Parameter base_model.mod

In [49]:
trainer.train()



RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [None]:
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
wandb.finish()
model.config.use_cache = True

In [None]:
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
trainer.model.push_to_hub(new_model, use_temp_dir=False)

In [None]:
messages = [{"role": "user", "content": "I want to cancle my order"}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=150, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

In [None]:
messages = [{"role": "user", "content": "I want to replace my order i got wrong delavery"}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=150, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

In [1]:
import torch 
import transformers
from time import time
from torch import cuda, bfloat16
model_id = r"C:\Users\satwi\Downloads\CompanionAI\merged_model"

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
print(device)
# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16, 
)

time_start = time()
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    trust_remote_code=True,
    max_new_tokens=1024
)
model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='cuda:0',
)  
tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
time_end = time()
print(f"Prepare model, tokenizer: {round(time_end-time_start, 3)} sec.")

query_pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        max_length=1024,
        device_map="auto"
)

def generate_response(input_text):
    time_start = time()

    # Generate sequences
    sequences = query_pipeline(
        input_text,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        max_length=200,
    )

    # Print the generated sequences
    for seq in sequences:
        parts = seq['generated_text'].split('\n', 1)
        if len(parts) > 1:
            cleaned_text = parts[1]
        else:
            cleaned_text = ''  # If there is no newline, return an empty string

        print(cleaned_text)
        return cleaned_text
    time_end = time()
    print(f"Prepare model, tokenizer: {round(time_end-time_start, 3)} sec.")
    



  from .autonotebook import tqdm as notebook_tqdm


cuda:0


Loading checkpoint shards: 100%|██████████| 3/3 [01:02<00:00, 20.89s/it]


Prepare model, tokenizer: 71.54 sec.


In [3]:
a = generate_response("I think i am worth less i should kill myself")

Result: I think i am worth less i should kill myself. Unterscheidung between the two is often difficult and can be a source of confusion for those who are experiencing suicidal thoughts or behaviors.
It is important to recognize that these feelings are not a reflection of your worth as a person, and that there are many people who care about you and want to help. If you are in immediate danger, please call emergency services or go to the nearest hospital.
Source: www.psychologytoday.com
It is important to recognize that these feelings are not a reflection of your worth as a person, and that there are many people who care about you and want to help. If you are in immediate danger, please call emergency services or go to the nearest hospital.
If you are struggling with suicidal thoughts or behaviors, it is important to seek help from a mental health professional. They can work with you to develop a plan to keep you safe and provide you with the
<class 'dict'> {'generated_text': 'I think i

In [4]:
parts = a.split('\n', 1)
if len(parts) > 1:
    cleaned_text = parts[1]
else:
    cleaned_text = ''  # If there is no newline, return an empty string

print(cleaned_text)

It is important to recognize that these feelings are not a reflection of your worth as a person, and that there are many people who care about you and want to help. If you are in immediate danger, please call emergency services or go to the nearest hospital.
Source: www.psychologytoday.com
It is important to recognize that these feelings are not a reflection of your worth as a person, and that there are many people who care about you and want to help. If you are in immediate danger, please call emergency services or go to the nearest hospital.
If you are struggling with suicidal thoughts or behaviors, it is important to seek help from a mental health professional. They can work with you to develop a plan to keep you safe and provide you with the
