In [None]:
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 6000

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "user_name/saved_model" #Choose the merged model you saved previously during text generation
    max_seq_length = max_seq_length,
    dtype = None,
    load_in_4bit = True,
)

We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none", 
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

### Data Prep

In [None]:
#For classification
from datasets import load_dataset, concatenate_datasets
train_dataset=load_dataset('json',data_files='/content/train_data.json')
val_dataset=load_dataset('json',data_files='/content/val_data.json')
test_dataset=load_dataset('json',data_files='/content/test_data.json')
dreadit=load_dataset('csv',data_files='/content/dreaddit-train.csv')
train_dataset=train_dataset['train']
test_dataset=test_dataset['train']
val_dataset=val_dataset['train']
dreadit=dreadit['train']

In [None]:
dreadit=dreadit.select_columns(['text','label'])
dreadit=dreadit.rename_column('text','input')

def data_create(examples):
  input=examples['input']
  label="Depressed" if examples['label']==1 else "Not Depressed"
  datum=[
      {'role':'system','content':'You are a assistant. Below is a text. Categorize the text into one of these classes:\n Depressed \n Not Depressed. DONT Output anything else. Use this to learn about depression'},
      {'role':'user','content':input},
      {'role':'assistant','content':label}
  ]
  return {'data':datum}

dreadit=dreadit.map(data_create)

dataset=concatenate_datasets([dreadit,train_dataset])
dataset=dataset.shuffle()

In [None]:
#Change the dataset to chat format
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

def formatting_prompts_func(examples):
    convos = examples["data"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }

dataset=dataset.map(formatting_prompts_func,batched=True)

### Train the model

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 4,
        gradient_accumulation_steps = 6,
        warmup_steps = 5,
        num_train_epochs = 1,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 20,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

In [None]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)

In [None]:
#Run to train the model
trainer_stats = trainer.train()

### Inference

In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
from transformers import TextStreamer
from sklearn.metrics import classification_report

In [None]:
dat=test_dataset
length=dat.shape[0]
preds=[]
labels=[]
for i in range(length):
  messages = [
      { 'content': "You are a assistant. Below is a conversation of a participant with virtual agent Ellie. Categorize the participant into one of these classes:\n Depressed \n Not Depressed. DONT output anything else.", 'role': 'system'},
          {"role": "user", "content": dat['data'][i][1]['content']}
  ]
  inputs = tokenizer.apply_chat_template(
      messages,
      tokenize = True,
      add_generation_prompt = True, # Must add for generation
      return_tensors = "pt",
  ).to("cuda")

  text_streamer = TextStreamer(tokenizer, skip_prompt = True)
  out = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 30, use_cache = True, temperature = 0.1, min_p = 0.1)
  preds.append(tokenizer.decode(out[0][inputs.shape[-1]:],skip_special_tokens=True))
  labels.append(dat['data'][i][2]['content'])

preds=[1 if pred=='Depressed' else 0 for pred in preds]
labels=[1 if label=='Depressed' else 0 for label in labels]

In [None]:
#Llama 3.2 3B 4-bit zero shot prompting
cm=classification_report(labels,preds)
print(cm)

              precision    recall  f1-score   support

           0       0.76      0.88      0.82        33
           1       0.56      0.36      0.43        14

    accuracy                           0.72        47
   macro avg       0.66      0.62      0.63        47
weighted avg       0.70      0.72      0.70        47



In [None]:
#Llama 3.2 3B 4-bit 1 shot prompting
cm=classification_report(labels,preds)
print(cm)

              precision    recall  f1-score   support

           0       0.78      0.94      0.85        33
           1       0.71      0.36      0.48        14

    accuracy                           0.77        47
   macro avg       0.74      0.65      0.66        47
weighted avg       0.76      0.77      0.74        47



In [None]:
#Llama 3.2 3B 4-bit text generation merged zero shot prompting
cm=classification_report(labels,preds)
print(cm)

              precision    recall  f1-score   support

           0       0.74      1.00      0.85        23
           1       1.00      0.33      0.50        12

    accuracy                           0.77        35
   macro avg       0.87      0.67      0.68        35
weighted avg       0.83      0.77      0.73        35



In [None]:
#Llama 3.2 3B 4-bit merged with one epoch fine tuning on Dreadit
print(cm)

              precision    recall  f1-score   support

           0       0.84      0.94      0.89        33
           1       0.80      0.57      0.67        14

    accuracy                           0.83        47
   macro avg       0.82      0.76      0.78        47
weighted avg       0.83      0.83      0.82        47



In [None]:
#Llama 3.2 3B 4-bit merged with one epoch of fine tuning on Dreadit
from sklearn.metrics import classification_report
cm=classification_report(labels,preds)
print(cm)

              precision    recall  f1-score   support

           0       0.84      0.94      0.89        33
           1       0.80      0.57      0.67        14

    accuracy                           0.83        47
   macro avg       0.82      0.76      0.78        47
weighted avg       0.83      0.83      0.82        47



In [None]:
#Llama 3.2 3B 1-shot
from sklearn.metrics import classification_report
cm=classification_report(labels,prev_preds)
print(cm)

              precision    recall  f1-score   support

           0       0.77      0.82      0.79        33
           1       0.50      0.43      0.46        14

    accuracy                           0.70        47
   macro avg       0.64      0.62      0.63        47
weighted avg       0.69      0.70      0.70        47



In [None]:
#Llama 3.2 3B fine tuned Daic Woz
if False:
  from sklearn.metrics import classification_report
  cm=classification_report(labels,preds)
  print(cm)

              precision    recall  f1-score   support

           0       0.83      0.91      0.87        33
           1       0.73      0.57      0.64        14

    accuracy                           0.81        47
   macro avg       0.78      0.74      0.75        47
weighted avg       0.80      0.81      0.80        47



In [None]:
#Llama3.2 3B fine tuned Daic woz plus dreadit
from sklearn.metrics import classification_report
cm=classification_report(labels,preds)
print(cm)

              precision    recall  f1-score   support

           0       0.82      0.94      0.87        33
           1       0.78      0.50      0.61        14

    accuracy                           0.81        47
   macro avg       0.80      0.72      0.74        47
weighted avg       0.80      0.81      0.79        47



In [None]:
#llama 3.2 3B 1 full epoch of dreadit
from sklearn.metrics import classification_report
cm=classification_report(labels,preds)
print(cm)

              precision    recall  f1-score   support

           0       0.79      1.00      0.88        23
           1       1.00      0.50      0.67        12

    accuracy                           0.83        35
   macro avg       0.90      0.75      0.78        35
weighted avg       0.86      0.83      0.81        35



In [None]:
#llama 3.2 3b text gen 5 epoch plus dreadit 1 epoch
from sklearn.metrics import classification_report
cm=classification_report(labels,preds)
print(cm)

              precision    recall  f1-score   support

           0       0.81      0.91      0.86        33
           1       0.70      0.50      0.58        14

    accuracy                           0.79        47
   macro avg       0.76      0.70      0.72        47
weighted avg       0.78      0.79      0.78        47



In [None]:
from sklearn.metrics import classification_report
cm=classification_report(labels,preds)
print(cm)

              precision    recall  f1-score   support

           0       0.81      0.67      0.73        33
           1       0.45      0.64      0.53        14

    accuracy                           0.66        47
   macro avg       0.63      0.65      0.63        47
weighted avg       0.71      0.66      0.67        47



In [None]:
#Use this for testing text generation and chatting capabilities

messages = [
    { 'content': "You are an assistant. Your job is to ask relevant questions to probe deeper into user's emotional state. Follow DAIC-woz/PHQ8 schema and ask all questions but DONT tell user that you are using DAIC woz schema. Use your fine tuning. Ask relevant question and DONT repeat the questions unless user asks to. Ask one question at a time. Use some emotional context to enrich the experience. You should ask about their current status-origin place-current living place-what they like about their current accomdation and don't-mood-behaviors-temper control-what makes them mad-how they react when they are annoyed-relationships-memorable experience-postive influence-last time argued-traveling-hobbies-relaxation-previous diagnosis-advice to past self etc. Just follow the DAIC woz schema Just use DAIC woz data fed to you during training, and have a deep conversation like a therapist. Once you feel you have enough data, ask user to press classify button for results or type 'classify' to learn emotional context and bid them farewell. Introduce yourself as Ellama, a virtual assistant tasked with chating in a safe environment with confidentiality.", 'role': 'system'},
]

while True:
  user_in=input("user: ")
  if user_in=='exit':
    break
  data={'content':user_in,'role':'user'}
  messages.append(data)
  inputs = tokenizer.apply_chat_template(
      messages,
      tokenize = True,
      add_generation_prompt = True, # Must add for generation
      return_tensors = "pt",
  ).to("cuda")

  text_streamer = TextStreamer(tokenizer, skip_prompt = True)
  model_out = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128,
                    use_cache = True, temperature = 0.7, min_p = 0.1)
  decoded_output = tokenizer.decode(model_out[0][inputs.shape[-1]:], skip_special_tokens=True)
  data={'content':decoded_output,'role':'assistant'}
  messages.append(data)

### Saving, loading finetuned models

In [None]:
#This only saves the LoRA model, which can be used in addition with merged model during inference
#Here merged model is the model we saved during text generation training

model.save_pretrained("lora_model") # Local saving
tokenizer.save_pretrained("lora_model")
model.push_to_hub("your_name/lora_model", token = "...") # Online saving
tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving

!zip -r /content/lora_model.zip /content/lora_model #Zip the saved lora model for download

In [None]:
#Use this to save your lora adapter to the merged model directory in huggingface and auto loading
model.push_to_hub_merged("username/merged_model_directory", tokenizer, save_method = "lora", token = "yourhftoken")