# Download and import libraries

In [1]:
%%capture
!pip install pip3-autoremove
!pip-autoremove torch torchvision torchaudio -y
!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121
!pip install unsloth

In [2]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
HF_TOKEN = user_secrets.get_secret("HF_TOKEN")

In [3]:
import os
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from unsloth import FastLanguageModel, is_bfloat16_supported
from transformers import TextStreamer
import torch

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


# Testing

In [9]:
dataset_path = "/kaggle/input/sleep-and-fitness-dataset/PH-LLM testing.csv"
df = pd.read_csv(dataset_path)

In [10]:
model_names = [
    "johnjehiel/PH-LLM-Llama-3.2-1B-Instruct-bnb-4bit",
    # "johnjehiel/PH-LLM-Llama-3.2-1B-Instruct-bnb-8bit-Q8_0",
    # "johnjehiel/PH-LLM-Llama-3.2-1B-Instruct-bnb-16bit-GGUF"
    "johnjehiel/PH-LLM-Llama-3.2-3B-Instruct-bnb-4bit",
    # "johnjehiel/PH-LLM-Llama-3.2-3B-Instruct-bnb-8bit-Q8_0",
    # "johnjehiel/PH-LLM-Llama-3.2-3B-Instruct-bnb-16bit-GGUF",
    "johnjehiel/PH-LLM-Llama-3.1-8B-Instruct-bnb-4bit",
]

In [11]:
prompt_template = """Below is an instruction that describes a task, paired with an optional input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

In [13]:
EOS_TOKEN_PLACEHOLDER = "<EOS>"

for mname in model_names:
    print(f"\nLoading model: {mname}")
    
    # Determine quantization setting from model name: if '4bit' is in the name, set load_in_4bit=True.
    load_in_4bit_flag = True if "4bit" in mname.lower() else False

    # Load model and tokenizer using UnsLoth's API.
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=mname,
        max_seq_length=2048,
        dtype=None,
        load_in_4bit=load_in_4bit_flag,
        token=HF_TOKEN
    )
    
    # For inference, enable fast inference mode.
    FastLanguageModel.for_inference(model)
    
    # Set the EOS token: if the tokenizer doesn't provide one, use the placeholder.
    EOS_TOKEN = tokenizer.eos_token if tokenizer.eos_token is not None else EOS_TOKEN_PLACEHOLDER
    
    # Prepare a list to collect responses for this model.
    model_responses = []
    
    # Iterate over each row in the testing dataset
    for idx, row in df.iterrows():
        print(f"Type: {row['Category']} | ID: {row['ID']}\n")
        # Use the 'Prompt' column as the instruction.
        test_prompt = row["Prompt"]
        # Format the prompt using the template. 'Input' is left empty, and 'Response' is empty.
        full_prompt = prompt_template.format(test_prompt, "", "")
        
        # Tokenize the prompt and move to GPU.
        inputs = tokenizer([full_prompt], return_tensors="pt").to("cuda")
        
        # Create a TextStreamer to optionally stream generation (can be omitted if silent generation is preferred)
        streamer = TextStreamer(tokenizer, skip_prompt=True)
        
        # Generate response from the model.
        output_ids = model.generate(
            input_ids=inputs.input_ids,
            attention_mask=inputs.attention_mask,
            streamer=streamer,            # This will stream the output in real time.
            max_new_tokens=700,           # Adjust as necessary.
            pad_token_id=tokenizer.eos_token_id
        )
        
        # Decode the generated tokens to text.
        generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        
        # Append the generated text to the list.
        model_responses.append(generated_text)
    
    # Define a column name based on the model name (using the last part of the repo name)
    col_name = "Response_" + mname.split("/")[-1]
    # Add the responses as a new column in the DataFrame.
    df[col_name] = model_responses
    
    # Clear GPU memory and delete model variables before loading the next model.
    del model, tokenizer, inputs
    torch.cuda.empty_cache()


Loading model: johnjehiel/PH-LLM-Llama-3.2-1B-Instruct-bnb-4bit
==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Type: A1 | ID: 1

## Sleep Insights Report

Here's an analysis of your sleep data, focusing on key areas for improvement:

### Routine

Your bedtime (00:45) and wake time (07:00) are very late. This late bedtime can disrupt your circadian rhythm and make it difficult to fall asleep and stay asleep. A bedtime between 10:00-11:00 PM and wake time between 8:00-9:00 AM would be more ideal.

### Sleep Quality

Your deep sleep percentage (70%) is very high, which is generally a pos

In [14]:
df

Unnamed: 0,Category,ID,Prompt,Response_PH-LLM-Llama-3.2-1B-Instruct-bnb-4bit,Response_PH-LLM-Llama-3.2-3B-Instruct-bnb-4bit,Response_PH-LLM-Llama-3.1-8B-Instruct-bnb-4bit
0,A1,1,You are a sleep medicine expert. You are given...,"Below is an instruction that describes a task,...","Below is an instruction that describes a task,...","Below is an instruction that describes a task,..."
1,A1,2,You are a sleep medicine expert. You are given...,"Below is an instruction that describes a task,...","Below is an instruction that describes a task,...","Below is an instruction that describes a task,..."
2,A1,3,You are a sleep medicine expert. You are given...,"Below is an instruction that describes a task,...","Below is an instruction that describes a task,...","Below is an instruction that describes a task,..."
3,A1,4,You are a sleep medicine expert. You are given...,"Below is an instruction that describes a task,...","Below is an instruction that describes a task,...","Below is an instruction that describes a task,..."
4,A1,5,You are a sleep medicine expert. You are given...,"Below is an instruction that describes a task,...","Below is an instruction that describes a task,...","Below is an instruction that describes a task,..."
5,A2,1,You are a sleep medicine expert. You are given...,"Below is an instruction that describes a task,...","Below is an instruction that describes a task,...","Below is an instruction that describes a task,..."
6,A2,2,You are a sleep medicine expert. You are given...,"Below is an instruction that describes a task,...","Below is an instruction that describes a task,...","Below is an instruction that describes a task,..."
7,A2,3,You are a sleep medicine expert. You are given...,"Below is an instruction that describes a task,...","Below is an instruction that describes a task,...","Below is an instruction that describes a task,..."
8,A2,4,You are a sleep medicine expert. You are given...,"Below is an instruction that describes a task,...","Below is an instruction that describes a task,...","Below is an instruction that describes a task,..."
9,A2,5,You are a sleep medicine expert. You are given...,"Below is an instruction that describes a task,...","Below is an instruction that describes a task,...","Below is an instruction that describes a task,..."


In [16]:
output_csv = "PH-LLM_testing_results.csv"
df.to_csv(output_csv, index=False)
print(f"Results saved to {output_csv}")

Results saved to PH-LLM_testing_results.csv
