In [4]:
!pip install trl==0.6.0 transformers==4.32.0 accelerate==0.12.0 peft==0.5.0 -Uqqq
!pip install datasets==2.13.1 bitsandbytes==0.41.1 einops==0.7.0 wandb==0.15.8 -Uqqq

In [1]:
from datasets import load_dataset, DatasetDict
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    GenerationConfig
)
from tqdm import tqdm
from trl import SFTTrainer
import torch
import time
import pandas as pd
import numpy as np
from huggingface_hub import interpreter_login

interpreter_login()


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .


Enter your token (input will not be visible):  ········
Add token as git credential? (Y/n)  n


Token is valid (permission: write).
Your token has been saved to /h/ws_vbhagat/.cache/huggingface/token
Login successful


In [6]:
import os
# disable Weights and Biases
os.environ['WANDB_DISABLED']="true"

In [7]:
huggingface_dataset_name = "Amod/mental_health_counseling_conversations"
dataset = load_dataset(huggingface_dataset_name)

In [8]:
total_length = len(dataset['train'])
print(total_length)

3512


In [9]:

split_dataset = dataset['train'].train_test_split(test_size=0.1)

# Further split the training set into train and validation sets (e.g., 75% train, 25% validation)
train_valid_split = split_dataset['train'].train_test_split(test_size=0.11)

# Combine the splits into a DatasetDict
dataset = DatasetDict({
    'train': train_valid_split['train'],
    'validation': train_valid_split['test'],
    'test': split_dataset['test']
})

# Print the number of examples in each split
print(f"Number of training examples: {len(dataset['train'])}")
print(f"Number of validation examples: {len(dataset['validation'])}")
print(f"Number of test examples: {len(dataset['test'])}")




Number of training examples: 2812
Number of validation examples: 348
Number of test examples: 352


In [10]:
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=False,
    )

In [11]:
os.chdir("../../../../../../")
os.getcwd()

model_name="/fs01/model-weights/Meta-Llama-3-8B-Instruct"
device_map = {"": 0}
original_model = AutoModelForCausalLM.from_pretrained(model_name, 
                                                      device_map=device_map,
                                                      quantization_config=bnb_config,
                                                      trust_remote_code=True,
                                                      use_auth_token=True)



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [12]:
tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True,padding_side="left",add_eos_token=True,add_bos_token=True,use_fast=False)
tokenizer.pad_token = tokenizer.eos_token

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [13]:
%%time
from transformers import set_seed
seed = 42
set_seed(seed)

index = 10

prompt = dataset['test'][index]['Context']
summary = dataset['test'][index]['Response']

formatted_prompt = f"Instruct: Answer the following mental health related question.\n{prompt}\nOutput:\n"
res = gen(original_model,formatted_prompt,100,)
#print(res[0])
output = res[0].split('Output:\n')[1]

dash_line = '-'.join('' for x in range(100))
print(dash_line)
print(f'INPUT PROMPT:\n{formatted_prompt}')
print(dash_line)
print(f'BASELINE HUMAN ANSWER:\n{summary}\n')
print(dash_line)
print(f'MODEL GENERATION - ZERO SHOT:\n{output}')

NameError: name 'gen' is not defined

In [14]:
def create_prompt_formats(sample):
    """
    Format various fields of the sample ('instruction','output')
    Then concatenate them using two newline characters 
    :param sample: Sample dictionnary
    """
    INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
    INSTRUCTION_KEY = "### Instruct: Answer the following question."
    RESPONSE_KEY = "### Output:"
    END_KEY = "### End"
    
    blurb = f"\n{INTRO_BLURB}"
    instruction = f"{INSTRUCTION_KEY}"
    input_context = f"{sample['Context']}" if sample["Context"] else None
    response = f"{RESPONSE_KEY}\n{sample['Response']}"
    end = f"{END_KEY}"
    
    parts = [part for part in [blurb, instruction, input_context, response, end] if part]

    formatted_prompt = "\n\n".join(parts)
    sample["text"] = formatted_prompt

    return sample

In [15]:
from functools import partial

# SOURCE https://github.com/databrickslabs/dolly/blob/master/training/trainer.py
def get_max_length(model):
    conf = model.config
    max_length = None
    for length_setting in ["n_positions", "max_position_embeddings", "seq_length"]:
        max_length = getattr(model.config, length_setting, None)
        if max_length:
            print(f"Found max lenth: {max_length}")
            break
    if not max_length:
        max_length = 1024
        print(f"Using default max length: {max_length}")
    return max_length


def preprocess_batch(batch, tokenizer, max_length):
    """
    Tokenizing a batch
    """
    return tokenizer(
        batch["text"],
        max_length=max_length,
        truncation=True,
    )

# SOURCE https://github.com/databrickslabs/dolly/blob/master/training/trainer.py
def preprocess_dataset(tokenizer: AutoTokenizer, max_length: int,seed, dataset):
    """Format & tokenize it so it is ready for training
    :param tokenizer (AutoTokenizer): Model Tokenizer
    :param max_length (int): Maximum number of tokens to emit from tokenizer
    """
    
    # Add prompt to each sample
    print("Preprocessing dataset...")
    dataset = dataset.map(create_prompt_formats)#, batched=True)
    
    # Apply preprocessing to each batch of the dataset & and remove 'instruction', 'context', 'response', 'category' fields
    _preprocessing_function = partial(preprocess_batch, max_length=max_length, tokenizer=tokenizer)
    dataset = dataset.map(
        _preprocessing_function,
        batched=True,
        remove_columns=['Context', 'Response'],
    )

    # Filter out samples that have input_ids exceeding max_length
    dataset = dataset.filter(lambda sample: len(sample["input_ids"]) < max_length)
    
    # Shuffle dataset
    dataset = dataset.shuffle(seed=seed)

    return dataset

In [16]:
## Pre-process dataset
max_length = get_max_length(original_model)
print(max_length)

train_dataset = preprocess_dataset(tokenizer, max_length,seed, dataset['train'])
eval_dataset = preprocess_dataset(tokenizer, max_length,seed, dataset['validation'])

Found max lenth: 8192
8192
Preprocessing dataset...


Map:   0%|          | 0/2812 [00:00<?, ? examples/s]

Map:   0%|          | 0/2812 [00:00<?, ? examples/s]

Filter:   0%|          | 0/2812 [00:00<?, ? examples/s]

Preprocessing dataset...


Map:   0%|          | 0/348 [00:00<?, ? examples/s]

Map:   0%|          | 0/348 [00:00<?, ? examples/s]

Filter:   0%|          | 0/348 [00:00<?, ? examples/s]

In [17]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

config = LoraConfig(
    r=32, #Rank
    lora_alpha=32,
    target_modules=[
        'q_proj',
        'k_proj',
        'v_proj',
        'dense'
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)

# 1 - Enabling gradient checkpointing to reduce memory usage during fine-tuning
original_model.gradient_checkpointing_enable()

peft_model = get_peft_model(original_model, config)

In [18]:
# 2 - Using the prepare_model_for_kbit_training method from PEFT
# Preparing the Model for QLoRA
original_model = prepare_model_for_kbit_training(original_model)

In [19]:
print(print_number_of_trainable_model_parameters(peft_model))

NameError: name 'print_number_of_trainable_model_parameters' is not defined

In [20]:
output_dir = f'/h/ws_vbhagat/llm_finetuning/Deloitte/finetuned/Vijay_FineTuning/Vijay_FineTuning_Checkpoints-{str(int(time.time()))}'
import transformers

peft_training_args = TrainingArguments(
    output_dir = output_dir,
    warmup_steps=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    max_steps=1000,
    learning_rate=2e-4,
    optim="paged_adamw_8bit",
    logging_steps=25,
    logging_dir="./logs",
    save_strategy="steps",
    save_steps=25,
    evaluation_strategy="steps",
    eval_steps=25,
    do_eval=True,
    gradient_checkpointing=True,
    report_to="none",
    overwrite_output_dir = 'True',
    group_by_length=True,
)

peft_model.config.use_cache = False

peft_trainer = transformers.Trainer(
    model=peft_model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    args=peft_training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
max_steps is given, it will override any value given in num_train_epochs


In [None]:
peft_trainer.train()



Step,Training Loss,Validation Loss
25,2.3818,2.278322
50,2.0928,2.270487
75,2.1405,2.212942
100,2.0843,2.22854
125,2.1722,2.204546
150,1.9656,2.22171




In [21]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

base_model_id = "/fs01/model-weights/Meta-Llama-3-8B-Instruct"
base_model = AutoModelForCausalLM.from_pretrained(base_model_id, 
                                                      device_map='auto',
                                                      quantization_config=bnb_config,
                                                      trust_remote_code=True,
                                                      use_auth_token=True)



Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [24]:
from peft import PeftModel

ft_model = PeftModel.from_pretrained(base_model, "/h/ws_vbhagat/llm_finetuning/Deloitte/finetuned/Vijay_FineTuning/Vijay_FineTuning_Checkpoints-1721306631/checkpoint-125",torch_dtype=torch.float16,is_trainable=False)

AttributeError: 'str' object has no attribute 'args'

In [38]:
model_save_path = "/h/ws_vbhagat/llm_finetuning/Deloitte/finetuned/Vijay_FineTuning/Vijay_FineTuning_SavedModels/llama3-8b-finetuned-mentalwellness-v1.pth"
torch.save(ft_model.state_dict(), model_save_path)

print(f"Model saved to {model_save_path}")

Model saved to /h/ws_vbhagat/llm_finetuning/Deloitte/finetuned/Vijay_FineTuning/Vijay_FineTuning_SavedModels/llama3-8b-finetuned-mentalwellness-v1.pth


In [30]:
original_model = AutoModelForCausalLM.from_pretrained(base_model_id, 
                                                      device_map='auto',
                                                      quantization_config=bnb_config,
                                                      trust_remote_code=True,
                                                      use_auth_token=True)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [34]:
import pandas as pd

contexts = dataset['test'][0:10]['Context']
human_baseline_responses = dataset['test'][0:10]['Response']


original_model_responses = []
instruct_model_responses = []
peft_model_responses = []

for idx, context in enumerate(contexts):
    human_baseline_text_output = human_baseline_responses[idx]
    prompt = f"Instruct: Answer the following question.\n{context}\nOutput:\n"
    
    original_model_res = gen(original_model,prompt,100,)
    original_model_text_output = original_model_res[0].split('Output:\n')[1]
    
    peft_model_res = gen(ft_model,prompt,100,)
    peft_model_output = peft_model_res[0].split('Output:\n')[1]
    print(peft_model_output)
    peft_model_text_output, success, result = peft_model_output.partition('###')

    original_model_responses.append(original_model_text_output)
    peft_model_responses.append(peft_model_text_output)

zipped_summaries = list(zip(human_baseline_responses, original_model_responses, peft_model_responses))
 
df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_responses', 'original_model_responses', 'peft_model_responses'])
df

This is a simulated response based on the prompt: Instruct: Answer the following question.
I've gone to a couple therapy sessions so far and still everytime I walk in I get nervous and shaky.  Is this normal? Should I still be feeling like this?

This is a simulated response based on the prompt: Instruct: Answer the following question.
I want a secure relationship with someone that wants to be with me and who will actually put effort into it.
 I seem to gravitate toward unavailable men and those that want intimacy and no relationship. I let men dictate and control me because they accuse me of being controlling. I let men emotionally abuse me and I am at their beck and call.
 I am not comfortable being alone or doing anything by myself. I feel I need the security of someone being around just to survive. I know what I'm doing wrong and I do it anyway just hoping things will change. How do I stop this behavior and thought process?

This is a simulated response based on the prompt: Instruc

Unnamed: 0,human_baseline_responses,original_model_responses,peft_model_responses
0,I would be more concerned with how is this bei...,This is a simulated response based on the prom...,This is a simulated response based on the prom...
1,Good for you on your keen awareness of your di...,This is a simulated response based on the prom...,This is a simulated response based on the prom...
2,Mental Health is an exciting and rewarding fie...,This is a simulated response based on the prom...,This is a simulated response based on the prom...
3,"As social creatures, we humans all long for de...",This is a simulated response based on the prom...,This is a simulated response based on the prom...
4,I'm sorry for your disappointment with this gi...,This is a simulated response based on the prom...,This is a simulated response based on the prom...
5,It sounds like you are in a tough situation. Y...,This is a simulated response based on the prom...,This is a simulated response based on the prom...
6,One of the first steps is to manage anxiety an...,This is a simulated response based on the prom...,This is a simulated response based on the prom...
7,I would first suggest you sitting down with hi...,This is a simulated response based on the prom...,This is a simulated response based on the prom...
8,"Hi Arkansas, Your situation sounds like a case...",This is a simulated response based on the prom...,This is a simulated response based on the prom...
9,You didn't ask a direct question because I fee...,This is a simulated response based on the prom...,This is a simulated response based on the prom...


In [40]:
!pip install gguf_library

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


[31mERROR: Could not find a version that satisfies the requirement gguf_library (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for gguf_library[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import torch


model = torch.load('/h/ws_vbhagat/llm_finetuning/Deloitte/finetuned/Vijay_FineTuning/Vijay_FineTuning_SavedModels/llama3-8b-finetuned-mentalwellness-v1.pth')



In [3]:
model.save_pretrained("/h/ws_vbhagat/llm_finetuning/Deloitte/finetuned/Vijay_FineTuning/Vijay_FineTuning_SavedModels")

AttributeError: 'collections.OrderedDict' object has no attribute 'save_pretrained'