In [1]:
import os
os.environ["TRANSFORMERS_NO_TF"] = "1"

In [2]:
import sys
print(sys.executable)

c:\Users\yusuf\OneDrive\Desktop\NLP mental health chatbot project\.venv\Scripts\python.exe


In [3]:
## checking if the notebook can detect the cuda so that we can actually switch to the gpu for faster and efficient training.
import torch
print(torch.version.cuda)
print(torch.backends.cudnn.version())
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

11.8
90100
True
NVIDIA GeForce GTX 1660 Ti with Max-Q Design


Here are the lists of the sources that I am planning to use for the project
1. https://huggingface.co/datasets/ShenLab/MentalChat16K
2. http://hpft.nhs.uk/media/1655/wellbeing-team-cbt-workshop-booklet-2016.pdf
3. https://cursosdepsicologia.com.ar/wp-content/uploads/2021/05/THEDIA1.pdf 

I think this sources will be enough since 3 of them are extensive and have lots of info. Moreover chatbots main aim is to make sure it does not hallucinate to take over the place of a professional and just offer help.

### 1. Necessary Libraries.

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, BitsAndBytesConfig, DataCollatorForLanguageModeling
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

### 2. Loading the model

In [None]:
ds = load_dataset("ShenLab/MentalChat16K")

print(ds['train'].column_names)

model_name = "google/gemma-3-4b-it"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
    )

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

model.print_trainable_parameters()

tokenizer = AutoTokenizer.from_pretrained(model_name)

['instruction', 'input', 'output']


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 13,107,200 || all params: 2,792,791,040 || trainable%: 0.4693


### 3. Enabling the model

In [None]:
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

def chat_preprocess(batch_examples):
    prompts = batch_examples['input']
    responses = batch_examples['output']

    all_conversations = []
    for i in range(len(prompts)):
        user_content = str(prompts[i]) if prompts[i] is not None else ""
        assistant_content = str(responses[i]) if responses[i] is not None else ""

        conversation = [
            {"role": "user", "content": user_content},
            {"role": "assistant", "content": assistant_content}
        ]
        all_conversations.append(conversation)

    formatted_texts = tokenizer.apply_chat_template(
        all_conversations,
        tokenize=False,
        add_generation_prompt=False
    )

    tokenized_output = tokenizer(
        formatted_texts,
        truncation=True,
        max_length=512,
        padding="max_length"
    )

    tokenized_output["labels"] = [ids.copy() for ids in tokenized_output["input_ids"]]

    return tokenized_output

tokenized_dataset = ds.map(chat_preprocess, batched=True)

Chat template set for microsoft/phi-2:
{% for message in messages %}{% if message['role'] == 'user' %}{{ 'Instruct: ' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{{ 'Output: ' + message['content'] + (tokenizer.eos_token or '') + '\n' }}{% endif %}{% endfor %}


Map:   0%|          | 0/16084 [00:00<?, ? examples/s]

UndefinedError: 'tokenizer' is undefined

### 4. Setting the hyperparameters of the model.

In [None]:
hyperparameters = TrainingArguments(
    output_dir="./gemma3-qlora-finetuned-results", 
    num_train_epochs=3,
    per_device_train_batch_size=1,      
    gradient_accumulation_steps=4,    
    optim="paged_adamw_8bit",
    learning_rate=2e-4,
    warmup_ratio=0.03,  
    warmup_steps=500,
    weight_decay=0.001,
    logging_dir="./logs-qlora",          
    logging_steps=10,
    save_strategy="epoch",
    bf16=True,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={'use_reentrant': False}
)

### 5. Fine Tuning the model.

In [None]:
trainer = Trainer(
    model=model,
    args=hyperparameters,
    train_dataset=tokenized_dataset["train"],
    tokenizer=tokenizer,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss


KeyboardInterrupt: 