In [None]:
!pip list | grep torch
!pip list | grep torchvision

In [None]:
#get the optimal pip installation command:
!wget -qO- https://raw.githubusercontent.com/unslothai/unsloth/main/unsloth/_auto_install.py | python -

In [None]:
!pip install -q --upgrade pip

In [None]:
!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps "unsloth[cu124-ampere-torch251] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
from huggingface_hub import login
from google.colab import userdata

hf_token = userdata.get('HF_TOKEN')
login(hf_token)

In [None]:
import wandb

wb_token = userdata.get('wandb_api')
wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune-DeepSeek-R1-Distill-Qwen-7B on ESCov',
    job_type="training",
    anonymous="allow"
)

In [None]:
from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Qwen-7B",
    max_seq_length = 2048,
    dtype = torch.bfloat16, #Defaults to None; use torch.float16 or torch.bfloat16 for newer GPUs.
    load_in_4bit = True, #Enables 4-bit quantization, reducing memory use 4× for fine-tuning on 16GB GPUs. Disabling it on larger GPUs (e.g., H100) slightly improves accuracy (1–2%).
    token = hf_token,
)

Load Data

In [None]:
from datasets import load_dataset
dataset = load_dataset("jordanfan/esconv_processed")

dataset_train = dataset["train"]
dataset_test = dataset["test"]

# Define target strategies
target_strategies = ["Question", "Affirmation and Reassurance", "Providing Suggestions", "Restatement or Paraphrasing"]

# Filter train and test datasets
filtered_train = dataset["train"].filter(lambda example: example["strategy"] in target_strategies)
filtered_test = dataset["test"].filter(lambda example: example["strategy"] in target_strategies)

# Split dataset into 80% train and 20% validation
#split_dataset = filtered_train.train_test_split(test_size=0.2, seed=42)

# Assign datasets
train_dataset = filtered_train
val_dataset = filtered_test

In [None]:
print(dataset_train.shape)  # Check dimensions

In [None]:
print(filtered_train)
print(filtered_test)

In [None]:
# Print dataset sizes
print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")

In [None]:
train_dataset[3]

Pre-Fine-Tuning Inference

In [None]:
prompt_style = """Below is a conversation between a student seeking mental health support and an AI counselor.
The AI counselor provides empathetic, evidence-based responses tailored to the student's concerns.
The student's concerns include prior conversation history and the most recent message.
The counselor outputs a structured JSON response.

### Student:
**Conversation History:**
{user_history}

**Current Message:**
{user_text}

### Counselor Structured JSON Response:
```json
{{
    "emotion_type": "{emotion_type}",
    "emotion_intensity (1-5)": {emotion_intensity_initial},
    "problem_type": "{problem_type}",
    "counseling_strategy": "{strategy}",
    "answer": "{counselor_first}"
}}
```
"""

In [None]:
user_history = "Hello good afternoon. I'm feeling anxious that I am going to lose my job. I hope I don't. I am on short term disability and I am not ready to go back to work yet but I do not have any job protection."
user_text ="It's not ending yet, but no my job is not protected. I live in the United States, but I have not been at my job long enough to earn protection for medical leave. you have to have been here for a year, and I started November 2020 I'm afraid that I will lose my job since I'm still on disability for the foreseeable future."
FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_style.format(user_history =user_history,user_text=user_text,emotion_type="",emotion_intensity_initial="",problem_type="",strategy="",counselor_first="")+ EOS_TOKEN], return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=500,
    use_cache=True,
)
response = tokenizer.decode(outputs[0])
print(response)

In [None]:
train_prompt_style = """Below is a conversation between a student seeking mental health support and an AI counselor.
The AI counselor provides empathetic, evidence-based responses tailored to the student's concerns.
The student's concerns include prior conversation history and the most recent message.
The counselor extracts the "emotion_type"(main emotion they feel going into session),"emotion_intensity (1-5)"(initial intensity of emotion before session),
"problem_type"(topic of conversation), "counseling_strategy"(Aggregated strategies counselor used at the current turn) from the Conversation History and Current Message,
and provide an "answer"(Counselor's first response to the user at the turn),
then outputs a structured JSON response.

### Student:
**Conversation History:**
{user_history}

**Current Message:**
{user_text}

### Counselor Structured JSON Response:
```json
{{
    "emotion_type": "{emotion_type}",
    "emotion_intensity (1-5)": {emotion_intensity_initial},
    "problem_type": "{problem_type}",
    "counseling_strategy": "{strategy}",
    "answer": "{counselor_first}"
}}
```
"""
EOS_TOKEN = tokenizer.eos_token


In [None]:
def formatting_prompts_func(examples):
    user_historys = examples["user_history"]  # Student's concern history
    user_texts = examples["user_text"]  # Student's most recent message
    answers = examples["counselor_first"]  # Counselor's response
    emotions = examples["emotion_type"]  # Emotion type
    emotion_intensity = examples["emotion_intensity_initial"]  # Initial intensity
    problem_type = examples["problem_type"]  # Problem category
    strategy = examples["strategy"]  # Counseling strategy
    texts = []

    for user_history, user_text, answer, emotion, intensity, problem, strat in zip(
        user_historys, user_texts, answers, emotions, emotion_intensity, problem_type, strategy
    ):
        text = train_prompt_style.format(
            emotion_type=emotion,
            emotion_intensity_initial=intensity,
            problem_type=problem,
            strategy=strat,
            user_history=user_history,
            user_text=user_text,
            counselor_first=answer
        )+EOS_TOKEN ## Must add EOS_TOKEN, otherwise your generation will go on forever!
        texts.append(text)

    return {"text": texts}

# Apply formatting to datasets
format_train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
format_val_dataset = val_dataset.map(formatting_prompts_func, batched=True)

# Print a formatted example
print(format_train_dataset["text"][10])

In [None]:
# Keep only the 'text' column
text_train_dataset = format_train_dataset.remove_columns([col for col in train_dataset.column_names if col != 'text'])
text_val_dataset = format_val_dataset.remove_columns([col for col in val_dataset.column_names if col != 'text'])

In [None]:
text_train_dataset[0]

Setting Up LoRA for Fine-Tuning

In [None]:
from peft import prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=32, #8-64,number of trainable parameters,a larger number uses more memory and will be slower, but can increase accuracy on harder tasks..w'=w+alpha/r(AB)
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ], #select all modules to finetune
    lora_alpha=32, #Scaling factor, a larger number will make the finetune learn more about your dataset, but can promote over-fitting. We suggest this to equal to the rank r, or double it.
    lora_dropout=0.05, #0 is optimized
    bias="none", #"none" is optimized
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False, #rank stabilized LoRA
    loftq_config=None,
)


In [None]:
model.print_trainable_parameters()

Configuring and Running the Training Process

In [None]:
if hasattr(model, "for_training"):
    delattr(model, "for_training")

In [None]:
# Ensure the entire model is in bfloat16
model = model.to(torch.bfloat16)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = text_train_dataset,
    dataset_text_field = "text",
    max_seq_length = 2048,
    dataset_num_proc = 2, # Noe of processes to use for loading and processing the data
    packing = False, #Can make training 5X faster for short sequence
    args = TrainingArguments(
        per_device_train_batch_size = 2, # Number of samples per batch per GPU
        gradient_accumulation_steps = 4, #Helps manage memory usage when increasing batch size. Accumulate gradients over 8 steps before updating weights
        num_train_epochs = 3,
        warmup_steps = 50,#Warmup should be ~5% of total steps for smoother training.
        #max_steps=3403, #no need if num_train_epochs is on, 4537 examples, batchsize 4, ~1134 steps for 1 epoch
        learning_rate = 1e-4,
        fp16 = False, #not is_bfloat16_supported(),# Use FP16 if BF16 isn't supported
        bf16= True, #is_bfloat16_supported(), # Use BF16 if supported (better for newer GPUs like A100)
        logging_steps = 100,
        optim = "adamw_8bit", # The optimizer that will be used for updating the weights
        weight_decay = 0.01, # Regularization term to prevent overfitting
        lr_scheduler_type = "linear", # Linear learning rate decay
        seed = 3407,
        output_dir = "/content/drive/My Drive/deepseek_finetuned_02", # Where to save the model checkpoints
        report_to = "wandb", # Use this for WandB etc
    ),
)

In [None]:
trainer_stats=trainer.train()

Saving the Fine-Tuned Model to HuggingFace

In [None]:
save_path = "/content/drive/My Drive/deepseek_finetuned_03"
model.save_pretrained(save_path) #this only save a small subset of parameters that were fine-tuned (like q_proj, v_proj, etc.)
tokenizer.save_pretrained(save_path)
print({save_path})

In [None]:
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"  # base model
save_path = "/content/drive/My Drive/deepseek_finetuned_03"  # LoRA checkpoint path

base_model = AutoModelForCausalLM.from_pretrained(model_name)
model = PeftModel.from_pretrained(base_model, save_path)
model = model.merge_and_unload()

final_save_path = "/content/drive/My Drive/deepseek_finetuned_full_03"
model.save_pretrained(final_save_path)
tokenizer.save_pretrained(final_save_path)
print({final_save_path})

In [None]:
# Save to Huggingface
model.push_to_hub("andong90/DeepSeek-R1-Distill-Qwen-7B-student-mental-health-json", token=hf_token)
tokenizer.push_to_hub("andong90/DeepSeek-R1-Distill-Qwen-7B-student-mental-health-json", token=hf_token)

Reload Model when restart notebook

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "andong90/DeepSeek-R1-Distill-Qwen-7B-student-mental-health-json"

model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")
tokenizer = AutoTokenizer.from_pretrained(model_name)


Test on few examples

In [None]:
prompt_test = """Given a student's Conversation History and Current Message, extract the relevant metadata, including emotion type, emotion intensity (1-5), problem type, and counseling strategy.
Then answer the student's Current Message as a counselor based on the metadata. Keep it concise but affirmative.
The counselor must return a Structured JSON Response with these fields: "emotion_type","emotion_intensity", "problem_type", "counseling_strategy","answer".

### Student:
**Conversation History:**
{user_history}

**Current Message:**
{user_text}

### Counselor Structured JSON Response:
"""

In [None]:
#You are an AI trained in counseling techniques.
'''Given a student's Conversation History and Current Message, extract the relevant metadata, including emotion type, emotion intensity (1-5), problem type, and counseling strategy.
Then answer the student's Current Message as a counselor based on the metadata. Keep it concise but affirmative.
The counselor must return a Structured JSON Response with these fields: "emotion_type", "sadness","emotion_intensity", "problem_type", "counseling_strategy","Providing Suggestions","answer".'''

In [None]:
val_dataset[6]

In [None]:
user_history=val_dataset[6]['user_history']
user_text=val_dataset[6]['user_text']
print("This the user history:",user_history)
print("This is the current question:",user_text)

In [None]:
from unsloth import FastLanguageModel
import torch

In [None]:
FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_test.format(user_history=user_history,user_text=user_text)], return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=250,
    eos_token_id=tokenizer.eos_token_id,
    num_return_sequences=1,
    temperature=0.6, # deepseek doc recommended 0.6 to balance creativity and coherence, avoiding repetitive or nonsensical outputs.
    top_p=0.9,  # Reduces repeated phrases
    use_cache=True,
)
response = tokenizer.decode(outputs[0],skip_special_tokens=True)
print(response)
#print(response[0].split("### Counselor Structured JSON Response:")[1].strip())

Try another one

In [None]:
val_dataset[15]

In [None]:
user_history=val_dataset[15]['user_history']
user_text=val_dataset[15]['user_text']
print("This the user history:",user_history)
print("This is the current question:",user_text)

In [None]:
FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_test.format(user_history=user_history,user_text=user_text)], return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=200,
    eos_token_id=tokenizer.eos_token_id,
    num_return_sequences=1,
    temperature=0.6, # deepseek doc recommended 0.6 to balance creativity and coherence, avoiding repetitive or nonsensical outputs.
    top_p=0.9,  # Reduces repeated phrases
    use_cache=True,
)
response = tokenizer.decode(outputs[0],skip_special_tokens=True)
print(response)
#print(response[0].split("### Counselor Structured JSON Response:")[1].strip())

In [None]:
print(response.split("### Counselor Structured JSON Response:")[1].strip())

Another exmaple

In [None]:
val_dataset[25]

In [None]:
user_history=val_dataset[25]['user_history']
user_text=val_dataset[25]['user_text']
print("This the user history:",user_history)
print("This is the current question:",user_text)

In [None]:
FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_test.format(user_history=user_history,user_text=user_text)], return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=200,
    eos_token_id=tokenizer.eos_token_id,
    num_return_sequences=1,
    temperature=0.6, # deepseek doc recommended 0.6 to balance creativity and coherence, avoiding repetitive or nonsensical outputs.
    top_p=0.9,  # Reduces repeated phrases
    use_cache=True,
)

response = tokenizer.batch_decode(outputs,skip_special_tokens=True)
print(response[0].split("### Counselor Structured JSON Response:")[1].strip())

4th test

In [None]:
val_dataset[45]

In [None]:
user_history=val_dataset[45]['user_history']
user_text=val_dataset[45]['user_text']
print("This the user history:",user_history)
print("This is the current question:",user_text)

In [None]:
FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_test.format(user_history=user_history,user_text=user_text)], return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=300,
    eos_token_id=tokenizer.eos_token_id,
    num_return_sequences=1,
    temperature=0.6, # deepseek doc recommended 0.6 to balance creativity and coherence, avoiding repetitive or nonsensical outputs.
    top_p=0.9,  # Reduces repeated phrases
    use_cache=True,
)

response = tokenizer.batch_decode(outputs,skip_special_tokens=True)
print(response[0].split("### Counselor Structured JSON Response:")[1].strip())

5th test

In [None]:
val_dataset[50]

In [None]:
user_history=val_dataset[50]['user_history']
user_text=val_dataset[50]['user_text']
print("This the user history:",user_history)
print("This is the current question:",user_text)

In [None]:
FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_test.format(user_history=user_history,user_text=user_text)], return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=200,
    eos_token_id=tokenizer.eos_token_id,
    num_return_sequences=1,
    temperature=0.6, # deepseek doc recommended 0.6 to balance creativity and coherence, avoiding repetitive or nonsensical outputs.
    top_p=0.9,  # Reduces repeated phrases
    use_cache=True,
)

response = tokenizer.batch_decode(outputs,skip_special_tokens=True)
print(response[0].split("### Counselor Structured JSON Response:")[1].strip())

6th Test

In [None]:
val_dataset[63]

In [None]:
user_history=val_dataset[63]['user_history']
user_text=val_dataset[63]['user_text']
print("This the user history:",user_history)
print("This is the current question:",user_text)

In [None]:
FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_test.format(user_history=user_history,user_text=user_text)], return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=200,
    eos_token_id=tokenizer.eos_token_id,
    num_return_sequences=1,
    temperature=0.6, # deepseek doc recommended 0.6 to balance creativity and coherence, avoiding repetitive or nonsensical outputs.
    top_p=0.9,  # Reduces repeated phrases
    use_cache=True,
)

response = tokenizer.batch_decode(outputs,skip_special_tokens=True)
print(response[0].split("### Counselor Structured JSON Response:")[1].strip())

In [None]:
val_dataset[73]

In [None]:
user_history=val_dataset[73]['user_history']
user_text=val_dataset[73]['user_text']
print("This the user history:",user_history)
print("This is the current question:",user_text)

In [None]:
FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt_test.format(user_history=user_history,user_text=user_text)], return_tensors="pt").to("cuda")
outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=200,
    eos_token_id=tokenizer.eos_token_id,
    num_return_sequences=1,
    temperature=0.6, # deepseek doc recommended 0.6 to balance creativity and coherence, avoiding repetitive or nonsensical outputs.
    top_p=0.9,  # Reduces repeated phrases
    use_cache=True,
)

response = tokenizer.batch_decode(outputs,skip_special_tokens=True)
print(response[0].split("### Counselor Structured JSON Response:")[1].strip())