### **Perform necessary installation**

In [1]:
%%capture
!pip install unsloth
# Also get the latest nightly Unsloth!
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

### **Import required libraries**

In [2]:
import torch
from unsloth import FastLanguageModel
from transformers import pipeline

import pandas as pd
import numpy as np
from datasets import Dataset
from tqdm import tqdm

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


### **Initialize required libraries**

In [3]:
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

<a name="Data"></a>
### **Data Prep**

In [5]:
data = pd.read_csv("Mental_Healthcare_Dataset.csv")
data = data.iloc[:, 1:]
data.head()

Unnamed: 0,statement,status
0,oh my gosh,Anxiety
1,"trouble sleeping, confused mind, restless hear...",Anxiety
2,"All wrong, back off dear, forward doubt. Stay ...",Anxiety
3,I've shifted my focus to something else but I'...,Anxiety
4,"I'm restless and restless, it's been a month n...",Anxiety


In [6]:
data.shape

(53043, 2)

In [7]:
data.loc[:,'status'] = data.loc[:,'status'].str.replace('Bi-Polar','Bipolar')
data = data[(data.status != "Personality disorder") & (data.status != "Stress") & (data.status != "Suicidal")]

In [8]:
data.shape

(38520, 2)

In [9]:
# Shuffle the DataFrame and select only 3000 rows
df = data.sample(frac=1, random_state=85).reset_index(drop=True).head(3000)

# Split the DataFrame
train_size = 0.8
eval_size = 0.1

# Calculate sizes
train_end = int(train_size * len(df))
eval_end = train_end + int(eval_size * len(df))

# Split the data
X_test = df[eval_end:]

def generate_test_prompt(data_point):
    return f"""
            Classify the text into Normal, Depression, Anxiety, Bipolar, and return the answer as the corresponding mental health disorder label.
text: {data_point["statement"]}
label: """.strip()

# Generate test prompts and extract true labels
y_true = X_test.loc[:,'status']
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["text"])

### **Load finetuned model**

In [None]:
if True:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "lora_model", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

### **Test finetuned model**

In [None]:
text = "I'm trapped in a storm of emotions that I can't control, and it feels like no one understands the chaos inside me"
prompt = f"""Classify the text into Normal, Depression, Anxiety, Bipolar, and return the answer as the corresponding mental health disorder label.
text: {text}
label: """.strip()

pipe = pipeline(
    "text-generation",
    model="unsloth/Meta-Llama-3.1-8B",
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)
pipe.model = model
outputs = pipe(prompt, max_new_tokens=2, do_sample=True, temperature=0.1)
print(outputs[0]["generated_text"].split("label: ")[-1].strip())

### **Perform prediction on test data**

In [None]:
def predict(test, model, tokenizer):
    y_pred = []
    categories = ["Normal", "Depression", "Anxiety", "Bipolar"]

    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["text"]
        pipe = pipeline(task="text-generation",
                        model="unsloth/Meta-Llama-3.1-8B",
                        tokenizer=tokenizer,
                        max_new_tokens=2,
                        temperature=0.1)
        pipe.model = model

        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()

        # Determine the predicted category
        for category in categories:
            if category.lower() in answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")

    return y_pred

y_pred = predict(X_test, model, tokenizer)

  0%|          | 0/300 [00:00<?, ?it/s]