In [1]:
from unsloth import FastLanguageModel
import torch
from datasets import load_dataset
import pandas as pd
from tqdm import tqdm  # Import tqdm for progress bar

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


2025-02-27 08:57:30.754211: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1740646650.768049    8568 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1740646650.771921    8568 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-27 08:57:30.787207: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load and configure the model
max_seq_length = 2048 
dtype = None 
load_in_4bit = True 

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="ibrahimShaban/few-shot-learning-deep-mental-deepLlama8b", 
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    cache_dir = "/mnt/batch/tasks/shared/LS_root/mounts/clusters/a10048/code/models"
)

FastLanguageModel.for_inference(model)  # Enable faster inference

==((====))==  Unsloth 2025.2.12: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    GPU: NVIDIA A100 80GB PCIe. Max memory: 79.151 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/52.9k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSN

In [6]:
# Define the Alpaca-style prompt format
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

In [7]:
# Load dataset from Hugging Face
DATASET_ID = "ShenLab/MentalChat16K"
dataset = load_dataset(DATASET_ID, split="train")  

# Select the first 10% of records
num_records = int(len(dataset) * 0.07)
subset = dataset.select(range(num_records))

# Prepare lists for generation
instruction_list = subset["instruction"]  
input_list = subset["input"]

In [9]:
# Generate responses using the model
responses = []
total_records = len(instruction_list)

print("🚀 Generating responses...\n")
for idx, (instruction, input_text) in enumerate(tqdm(zip(instruction_list, input_list), total=total_records, desc="Generating")):
    prompt = alpaca_prompt.format(instruction, input_text, "")
    
    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
    
    with torch.no_grad():
        output = model.generate(
            **inputs, 
            max_new_tokens=256,  # Adjust if needed
            do_sample=True,  # Enables sampling for diverse outputs
            temperature=0.7,  # Controls randomness
            top_p=0.9,  # Nucleus sampling for better results
            pad_token_id=tokenizer.eos_token_id  # Prevent padding issues
        )
    
    response_text = tokenizer.decode(output[0], skip_special_tokens=True)
    responses.append(response_text)

    # Print progress every 5 responses
    if idx % 5 == 0 or idx == total_records - 1:
        print(f"\n🔹 Response {idx+1}/{total_records}:\nInstruction: {instruction}\nInput: {input_text}\nResponse: {response_text}\n" + "-"*50)

# Create a DataFrame with results
df = pd.DataFrame({
    "instruction": instruction_list,
    "input": input_list,
    "response": responses
})

# Save to CSV
csv_filename = "generated_Few_Shot_DeepMental_responses.csv"
df.to_csv(csv_filename, index=False)

print(f"\n✅ CSV file saved: {csv_filename}")

Generating: 100%|█████████▉| 1124/1125 [4:02:17<00:07,  7.25s/it] 



🔹 Response 1/1125:
Instruction: You are a helpful mental health counselling assistant, please answer the mental health questions based on the patient's description. 
The assistant gives helpful, comprehensive, and appropriate answers to the user's questions. 
Input: I've been struggling with my mental health for a while now, and I can't seem to find a way to cope with it. I've tried visualization, positive thinking, and even medication, but nothing seems to work. I've been feeling lost and helpless, and I don't know what to do next. My mind is a whirlwind of thoughts and emotions, and I can't seem to make sense of it all. I feel like I'm drowning in a sea of confusion, and I can't seem to find my way out.
Response: Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
You are a helpful mental health counselling assistant, please answer the mental health questions b