In [None]:
!jupyter kernelspec list

In [None]:
 #https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Phi_4-Conversational.ipynb#scrollTo=XUYo7C-bc8oc

In [None]:
!conda env list

In [None]:
!python -m ipykernel install --user --name py312jc2 --display-name "Python (py312jc2)"

In [None]:
!python --version

In [None]:
import sys
print(sys.version) # should be 3.12.9

In [None]:
import os

from unsloth import FastLanguageModel  # FastVisionModel for LLMs
import torch

In [None]:
import torch
import numpy as np

print("PyTorch version:", torch.__version__) # 2.5.0+cu124
print("NumPy version:", np.__version__) # 2.1.2

In [None]:
import pandas as pd

In [None]:
max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
load_in_4bit = True  # Use 4bit quantization to reduce memory usage. Can be False.

In [None]:
# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",  # Llama-3.1 2x faster
    "unsloth/Mistral-Small-Instruct-2409",  # Mistral 22b 2x faster!
    "unsloth/Phi-4",  # Phi-4 2x faster!
    "unsloth/Phi-4-unsloth-bnb-4bit",  # Phi-4 Unsloth Dynamic 4-bit Quant
    "unsloth/gemma-2-9b-bnb-4bit",  # Gemma 2x faster!
    "unsloth/Qwen2.5-7B-Instruct-bnb-4bit"  # Qwen 2.5 2x faster!
    "unsloth/Llama-3.2-1B-bnb-4bit",  # NEW! Llama 3.2 models
    "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
    "unsloth/Llama-3.2-3B-bnb-4bit",
    "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
]  # More models at https://docs.unsloth.ai/get-started/all-our-models


In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-4",
    max_seq_length = max_seq_length,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "phi-4",
)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [
        tokenizer.apply_chat_template(
            convo, tokenize = False, add_generation_prompt = False
        )
        for convo in convos
    ]
    return { "text" : texts, }
pass

from datasets import load_dataset
dataset = load_dataset("mlabonne/FineTome-100k", split = "train")

In [None]:
from unsloth.chat_templates import standardize_sharegpt

dataset = standardize_sharegpt(dataset)
dataset = dataset.map(
    formatting_prompts_func,
    batched=True,
)

In [None]:
dataset[5]["conversations"]

In [None]:
dataset[5]["text"]

In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "phi-4",
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

messages = [
    {"role": "user", "content": "Continue the fibonnaci sequence: 1, 1, 2, 3, 5, 8,"},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

outputs = model.generate(
    input_ids = inputs, max_new_tokens = 64, use_cache = True, temperature = 1.5, min_p = 0.1
)
tokenizer.batch_decode(outputs)

In [None]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

messages = [
    {"role": "user", "content": "Continue the fibonnaci sequence: 1, 1, 2, 3, 5, 8,"},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(
    input_ids = inputs, streamer = text_streamer, max_new_tokens = 128, # do_sample=False, #add to disable randomness 
    use_cache = True, temperature = 1.5, min_p = 0.1, num_beams=1,      # Single beam ensures no alternative paths
)

In [None]:
model.save_pretrained("phi4_model")  # Local saving
tokenizer.save_pretrained("phi4_model")

In [None]:
if False:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "phi4_model", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

messages = [
    {"role": "user", "content": "does this sentence indicate mental health recovery - he was well kempt"},
]
inputs = tokenizer.apply_chat_template(
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(
    input_ids = inputs, streamer = text_streamer, max_new_tokens = 128,
    use_cache = True#, temperature = 1.5, min_p = 0.1
)

In [None]:
# FROM LEO - this works!

from unsloth.chat_templates import get_chat_template 

tokenizer = get_chat_template(
    tokenizer, 
    chat_template = "phi-4", 
) 

FastLanguageModel.for_inference(model) # Enable native 2x faster inference 

messages = [     
    {   
    "role": "system",   
    "content": "You are an expert clinical NLP assistant. Your task is to identify whether a sentence indicates mental health recovery, including implicit signs. Classify recovery into a predefined domain. Think step-by-step before arriving at a decision. Respond only with structured JSON. Do not include any explanations, comments, or additional text." 
    }, 
    {   
    "role": "user",   
    "content": '''Determine whether the following sentence indicates mental health recovery. Think step-by-step through the sentence to determine if it shows any signs of recovery. Then respond in this exact JSON format: { 'recovery': 1 or 0, 'type': '<type from the list below>' } 
        ### Recovery Types: 
             - 'social': indicates at least one meaningful social relationship (intimate partner, family member, friend) 
             - 'occupational': evidence of work, volunteering, vocational training, or study 
             - 'activities of daily living': ability to organise and manage aspects of daily life such as dressing, hygiene, transportation, shopping, finances, paying bills, meal prep, home maintenance, and medication 
             - 'personal': shows insight into self and relationship to self 
             - 'non-derived': no reference to a specific domain 
             
        ### Examples: 
             - Sentence: 'She is doing well in her job.'   
             Response: { 'recovery': 1, 'type': 'occupational' } 
             - Sentence: 'He is not able to pay his debts.'   
             Response: { 'recovery': 0, 'type': 'activities of daily living' } 
        ### Sentence: He had taken his medication prior to my visits and checked his BMs which were 8mmols  
        ''' 
    } ] 

inputs = tokenizer.apply_chat_template(     
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation     
    return_tensors = "pt", 
).to("cuda") 

outputs = model.generate(
    input_ids = inputs, max_new_tokens = 64, use_cache = True, 
    do_sample=False, num_beams=1) 
generated_tokens = outputs[0][inputs.shape[1]:] 
completion = tokenizer.decode(generated_tokens, 
                              skip_special_tokens=True).strip() 

print(completion)

In [None]:
#Trying to make Leo code to run through df - i think this is now working

from tqdm import tqdm
import time

tokenizer = get_chat_template(
    tokenizer, 
    chat_template="phi-4"
)

FastLanguageModel.for_inference(model)  # Enable native 2x faster inference

def generate_response(text):
    messages = [
        {   "role": "system",   "content": "You are an expert clinical NLP assistant. Your task is to identify whether a sentence indicates mental health recovery, including implicit signs. Classify recovery into a predefined domain. Think step-by-step before arriving at a decision. Respond only with structured JSON. Do not include any explanations, comments, or additional text." },
        {   "role": "user",   "content": f'''Determine whether the following sentence indicates mental health recovery. Think step-by-step through the sentence to determine if it shows any signs of recovery. Then respond in this exact JSON format: {{ 'recovery': 1 or 0, 'type': '<type from the list below>' }}
            ### Recovery Types:
                 - 'social': indicates at least one meaningful social relationship (intimate partner, family member, friend)
                 - 'occupational': evidence of work, volunteering, vocational training, or study
                 - 'activities of daily living': ability to organise and manage aspects of daily life such as dressing, hygiene, transportation, shopping, finances, paying bills, meal prep, home maintenance, and medication
                 - 'personal': shows insight into self and relationship to self
                 - 'non-derived': no reference to a specific domain

            ### Sentence: {text}  
        ''' }
    ]
    
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to("cuda")
    
    outputs = model.generate(
        input_ids=inputs, max_new_tokens=64, use_cache=True, do_sample=False, num_beams=1
    )
    
    generated_tokens = outputs[0][inputs.shape[1]:]
    completion = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
    
    return completion

# Load DataFrame from CSV 
df = pd.read_csv('all_matched_of_PD_recovery_using_csv_dataV12-test_set_3-4-ALL.csv')
#df = df.sample(n=100, random_state=42)

# Ensure 'Text' column exists
if 'Text' in df.columns:
    llm_outputs = []
    start_time = time.time()
    for text in df['Text']:
        llm_outputs.append(generate_response(text))
    df['llm-output'] = llm_outputs
    end_time = time.time()
    print(f"Processing completed in {end_time - start_time:.2f} seconds")
else:
    print("Error: 'Text' column not found in DataFrame")

# Save updated DataFrame to CSV
df.to_csv('output_data_new_test_set_domain_3_4.csv', index=False)

df.head()


In [None]:
df_small = df.sample(n=5, random_state=42) 
df_small.head(2)

In [None]:
import torch
torch.cuda.empty_cache()


In [None]:
# try to read through dataframe and output result - apply Leo's prompt to it and i have modified it

from unsloth.chat_templates import get_chat_template 

tokenizer = get_chat_template(
    tokenizer, 
    chat_template = "phi-4", 
) 


# Enable native inference (already done in your code)
FastLanguageModel.for_inference(model)

# Prepare the function to process each row
def generate_output_for_row(row):
    # Create the messages to pass to the model
    ##old prompt - seems to work best
    
    messages = [     
    {   
    "role": "system",   
    "content": "You are a medical professional who is an expert at identifying mental health recovery even when it is implied in text rather than being explicit. Your task is to identify whether a sentence indicates mental health recovery, including implicit mentions. Classify recovery into a predefined domain. Think step-by-step before arriving at a decision. Respond only with structured JSON. Do not include any explanations, comments, or additional text." 
    }, 
    {   
    "role": "user",   
    "content": '''Determine whether the following sentence indicates mental health recovery. Respond in this exact JSON format: { 'recovery': 1 or 0, 'type': '<type from the list below>' } 
        ### Recovery Types: 
             - 'social': indicates at least one meaningful social relationship (intimate partner, family member, friend) 
             - 'occupational': evidence of work, volunteering, vocational training, or study 
             - 'activities of daily living': ability to organise and manage aspects of daily life such as dressing, hygiene, transportation, shopping, finances, paying bills, meal prep, home maintenance, and medication 
             - 'personal': shows insight into self and relationship to self 
             - 'non-derived': no reference to a specific domain 
             
        ### Examples: 
    - Sentence: ‘Difficulty finding job - Now feels too de-motivated to look for job’
      Response: { ‘recovery’: 0, ‘type’: ‘occupational’ }
    - Sentence: ‘He would not want his son to live without him’
      Response: { ‘recovery’: 1, ‘type’: ‘social’ }
    - Sentence: ‘Insight Good insight into his difficulties but reactive to make personal changes’
      Response: { ‘recovery’: 1, ‘type’: ‘personal’ }
    - Sentence: ‘During this time he reports having fallen out of love with his wife’
      Response: { ‘recovery’: 0, ‘type’: ‘social’ }
    - Sentence: ‘Patient appeared well kempt’
      Response: { ‘recovery’: 1, ‘type’: ‘activity of daily living’ }
        
        ### Sentence: {row}
         '''
    } ] 
    


    inputs = tokenizer.apply_chat_template(     
    messages,
    tokenize = True,
    add_generation_prompt = True, # Must add for generation     
    return_tensors = "pt", 
).to("cuda") 
    
    # Streamer for the model output
    text_streamer = TextStreamer(tokenizer, skip_prompt=True)
    
    # Generate output from the model
    generated_output = model.generate(
        input_ids=inputs, 
        streamer=text_streamer, 
        max_new_tokens=64,
        use_cache=True,
        #num_beams=1, # add because Single beam ensures no alternative paths
        #do_sample=False #add to disable randomness
        temperature=0.7, #remove
        #min_p=0.1 #remove
        top_p=0.9  #keeps high-probability responses while adding flexibility.
    )
    
    # The generated output can be processed and returned as needed (e.g., decoded)
    decoded_output = tokenizer.batch_decode(generated_output, skip_special_tokens=True) 
    
    # Return the first (and likely only) generated output
    return decoded_output[0]

df_small['output'] = df_small['Text'].apply(generate_output_for_row)
# Apply the function to each row of the DataFrame and store the results in a new column 'output'
#df_small['output'] = df_small.apply(generate_output_for_row, axis=1)
#df_small['output'] = df_small.apply(generate_output_for_row, axis=1)

# Now, 'df' will have a new column 'output' with the results
df_small.head()
#df.head()

In [None]:
# try to read through dataframe and output result

# Enable native inference (already done in your code)
FastLanguageModel.for_inference(model)

# Prepare the function to process each row
def generate_output_for_row(row):
    # Create the messages to pass to the model
    ##old prompt - seems to work best
    messages = [
   {"role": "system", "content": "You are a medical professional who is an expert at identifying mental health recovery even when it is implied in text rather than being explicit. Precisely give the output, no extra description is needed"},
    {"role": "user", "content": """
    Identify whether the following sentence indicates mental health recovery
    
    Respond with type of recovery and 1 for recovery and 0 for no recovery.
    
    Here are the types of recovery:
    - social: indicates at least one meaningful social relationship (intimate partner, family member, friend)
    - occupational: evidence of work, volunteering, vocational training, or study
    - activities of daily living: ability to organise and manage aspects of daily life such as dressing, hygiene, transportation, shopping, finances, paying bills, meal prep, home maintenance, and medication
    - personal: shows insight into self and relationship to self
    - non-derived: no reference to a specific domain
    
    Here are some examples:
    - sentence: she is doing well in her job.
      response: recovery=1, type=occupational
    
    - sentence: he is not able to pay his debts.
      response: recovery=0, type=activities of daily living
    """}
]
    # Tokenize and process the messages
    inputs = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,  # Must add for generation
        return_tensors="pt",
    ).to("cuda")
    
    # Streamer for the model output
    text_streamer = TextStreamer(tokenizer, skip_prompt=True)
    
    # Generate output from the model
    generated_output = model.generate(
        input_ids=inputs, 
        streamer=text_streamer, 
        max_new_tokens=128,
        use_cache=True,
        #num_beams=1, # add because Single beam ensures no alternative paths
        #do_sample=False #add to disable randomness
        temperature=1.5, #remove
        min_p=0.1 #remove
    )
    
    # The generated output can be processed and returned as needed (e.g., decoded)
    decoded_output = tokenizer.batch_decode(generated_output, skip_special_tokens=True)
    
    # Return the first (and likely only) generated output
    return decoded_output[0]

# Apply the function to each row of the DataFrame and store the results in a new column 'output'
#df_small['output'] = df_small.apply(generate_output_for_row, axis=1)
df_small['output'] = df_small['Text'].apply(generate_output_for_row, axis=1)

# Now, 'df' will have a new column 'output' with the results
df_small.head()
#df.head()

start time: 11:58 - for all 13 k - it had been 6 hours and it was still going so i stopped it.

df_small - random 100 from data test set domains 3 and 4
start time: 12:26

In [None]:
df_small.to_csv('outputs_small_old_prompt_3-4-test.csv')
#df.to_csv('outputs_domain3-4.csv')

In [None]:
# Load the Excel file
#df = pd.read_csv("outputs_small.csv")
df = pd.read_csv("outputs_small_old_prompt_3-4.csv")

# Define a function to remove text before "assistant"
def remove_text_before_assistant(text):
    if pd.isna(text):  # Check for NaN values
        return text
    keyword = "assistant"
    index = text.lower().find(keyword)  # Find the position of "assistant" (case-insensitive)
    return text[index:] if index != -1 else text  # Keep text from "assistant" onwards

# Apply the function to the column (replace 'your_column' with the actual column name)
df["output_clean"] = df["output"].apply(remove_text_before_assistant)

# Save the modified Excel file
df.to_csv("cleaned_output_small_old_prompt_3-4.csv", index=False)

print("Text before 'assistant' removed successfully!")


In [None]:
from datetime import datetime

# Get current time
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# Define filename
filename = "end_time.txt"

# Save to a text file
with open(filename, "w") as file:
    file.write("End time: " + current_time)

print(f"Time saved to {filename} successfully!")


In [None]:
df = pd.read_csv("cleaned_output.csv")
df.to_csv('cleaned_output_domain3-4_default_settings.csv')

**calculate some metrics after checking and cleaning the file**

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import precision_recall_fscore_support
from scipy.stats import bootstrap

In [None]:
# Load CSV file
df = pd.read_csv("cleaned_output.csv")
df.head()

In [None]:
# Ensure there are no missing values
df = df.dropna(subset=["code", "code-from-llm"])

In [None]:
# Extract true labels and predictions
true_labels = df["code"].values
pred_labels = df["code-from-llm"].values

In [None]:
# Compute precision, recall, and F1-score
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average="weighted")

In [None]:
# Print scores
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

In [None]:
# Function to compute metric for bootstrapping
def metric_fn(indices):
    sample_preds = pred_labels[indices]
    sample_labels = true_labels[indices]
    p, r, f, _ = precision_recall_fscore_support(sample_labels, sample_preds, average="weighted")
    return np.array([p, r, f])

In [None]:
# Perform bootstrap resampling for confidence intervals
boot_results = bootstrap(
    data=(np.arange(len(true_labels)),),  # Sample indices
    statistic=metric_fn,
    confidence_level=0.95,
    random_state=42,
    method='percentile'
)

In [None]:
# Extract confidence intervals
precision_ci = boot_results.confidence_interval[0]
recall_ci = boot_results.confidence_interval[1]
f1_ci = boot_results.confidence_interval[2]

In [None]:
# Print confidence intervals
print(f"Precision: {precision:.4f} (95% CI: {precision_ci[0]:.4f} - {precision_ci[1]:.4f})")
print(f"Recall: {recall:.4f} (95% CI: {recall_ci[0]:.4f} - {recall_ci[1]:.4f})")
print(f"F1 Score: {f1:.4f} (95% CI: {f1_ci[0]:.4f} - {f1_ci[1]:.4f})")


In [None]:
# Create a DataFrame with metrics
metrics_df = pd.DataFrame({
    "Metric": ["Precision", "Recall", "F1 Score"],
    "Value": [precision, recall, f1],
    "95% CI Lower": [precision_ci[0], recall_ci[0], f1_ci[0]],
    "95% CI Upper": [precision_ci[1], recall_ci[1], f1_ci[1]]
})

# Save metrics to CSV
metrics_df.to_csv("metrics.csv", index=False)

print("Metrics saved successfully to metrics.csv!")

**For reasoning**

In [None]:
from unsloth import FastLanguageModel, is_bfloat16_supported
import torch
max_seq_length = 512 # Can increase for longer reasoning traces
lora_rank = 16 # Larger rank = smarter, but slower

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-4",
    max_seq_length = max_seq_length,
    load_in_4bit = True, # False for LoRA 16bit
    fast_inference = True, # Enable vLLM fast inference
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 0.7, # Reduce if out of memory
)

model = FastLanguageModel.get_peft_model(
    model,
    r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["gate_proj", "up_proj", "down_proj",],
    lora_alpha = lora_rank,
    use_gradient_checkpointing = "unsloth", # Enable long context finetuning
    random_state = 3407,
)

In [None]:
import re
from datasets import load_dataset, Dataset

# Load and prep dataset
SYSTEM_PROMPT = """
Respond in the following format:
<reasoning>
...
</reasoning>
<answer>
...
</answer>
"""

XML_COT_FORMAT = """\
<reasoning>
{reasoning}
</reasoning>
<answer>
{answer}
</answer>
"""

def extract_xml_answer(text: str) -> str:
    answer = text.split("<answer>")[-1]
    answer = answer.split("</answer>")[0]
    return answer.strip()

def extract_hash_answer(text: str) -> str | None:
    if "####" not in text:
        return None
    return text.split("####")[1].strip()

# uncomment middle messages for 1-shot prompting
def get_gsm8k_questions(split = "train") -> Dataset:
    data = load_dataset('openai/gsm8k', 'main')[split] # type: ignore
    data = data.map(lambda x: { # type: ignore
        'prompt': [
            {'role': 'system', 'content': SYSTEM_PROMPT},
            {'role': 'user', 'content': x['question']}
        ],
        'answer': extract_hash_answer(x['answer'])
    }) # type: ignore
    return data # type: ignore

dataset = get_gsm8k_questions()

# Reward functions
def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[float]:
    responses = [completion[0]['content'] for completion in completions]
    q = prompts[0][-1]['content']
    extracted_responses = [extract_xml_answer(r) for r in responses]
    print('-'*20, f"Question:\n{q}", f"\nAnswer:\n{answer[0]}", f"\nResponse:\n{responses[0]}", f"\nExtracted:\n{extracted_responses[0]}")
    return [2.0 if r == a else 0.0 for r, a in zip(extracted_responses, answer)]

def int_reward_func(completions, **kwargs) -> list[float]:
    responses = [completion[0]['content'] for completion in completions]
    extracted_responses = [extract_xml_answer(r) for r in responses]
    return [0.5 if r.isdigit() else 0.0 for r in extracted_responses]

def strict_format_reward_func(completions, **kwargs) -> list[float]:
    """Reward function that checks if the completion has a specific format."""
    pattern = r"^<reasoning>\n.*?\n</reasoning>\n<answer>\n.*?\n</answer>\n$"
    responses = [completion[0]["content"] for completion in completions]
    matches = [re.match(pattern, r) for r in responses]
    return [0.5 if match else 0.0 for match in matches]

def soft_format_reward_func(completions, **kwargs) -> list[float]:
    """Reward function that checks if the completion has a specific format."""
    pattern = r"<reasoning>.*?</reasoning>\s*<answer>.*?</answer>"
    responses = [completion[0]["content"] for completion in completions]
    matches = [re.match(pattern, r) for r in responses]
    return [0.5 if match else 0.0 for match in matches]

def count_xml(text) -> float:
    count = 0.0
    if text.count("<reasoning>\n") == 1:
        count += 0.125
    if text.count("\n</reasoning>\n") == 1:
        count += 0.125
    if text.count("\n<answer>\n") == 1:
        count += 0.125
        count -= len(text.split("\n</answer>\n")[-1])*0.001
    if text.count("\n</answer>") == 1:
        count += 0.125
        count -= (len(text.split("\n</answer>")[-1]) - 1)*0.001
    return count

def xmlcount_reward_func(completions, **kwargs) -> list[float]:
    contents = [completion[0]["content"] for completion in completions]
    return [count_xml(c) for c in contents]

In [None]:
text = tokenizer.apply_chat_template([
    {"role" : "user", "content" : "Which is bigger? 9.11 or 9.9?"},
], tokenize = False, add_generation_prompt = True)

from vllm import SamplingParams
sampling_params = SamplingParams(
    temperature = 0.8,
    top_p = 0.95,
    max_tokens = 1024,
)
output = model.fast_generate(
    [text],
    sampling_params = sampling_params,
    lora_request = None,
)[0].outputs[0].text

output

In [None]:
from vllm import LLM, SamplingParams

llm = LLM(model="microsoft/Phi-4-mini-instruct", trust_remote_code=True)

messages = [
    {"role": "system", "content": "You are a helpful AI assistant."},
    {"role": "user", "content": "Can you provide ways to eat combinations of bananas and dragonfruits?"},
    {"role": "assistant", "content": "Sure! Here are some ways to eat bananas and dragonfruits together: 1. Banana and dragonfruit smoothie: Blend bananas and dragonfruits together with some milk and honey. 2. Banana and dragonfruit salad: Mix sliced bananas and dragonfruits together with some lemon juice and honey."},
    {"role": "user", "content": "What about solving an 2x + 3 = 7 equation?"},
]

sampling_params = SamplingParams(
  max_tokens=500,
  temperature=0.0,
)

output = llm.chat(messages=messages, sampling_params=sampling_params)
print(output[0].outputs[0].text)


In [None]:
# Few-shot examples to guide the model

prompt = """
Extract meaningful information related to mental health recovery from the following text. Focus on key aspects like occupation, social recovery, activities of daily living, and personal growth.
Social – this indicates at least one meaningful social relationship (intimate partner, family member, friend)
Occupational – evidence of work, volunteering, vocational training, or study. This is inclusive of hobbies and caring commitments.
Activities of Daily Living – ability to organize and manage aspects of daily life such as dressing; hygiene; transportation; shopping; finances (bills, manage assets); meal prep, home maintenance, communication with others (phone, email); and medications.
Personal – shows insight into self, and relationship to self.
Non- derived – recovery is indicated in the notes without reference to a specific domain.

Example 1:
Text: "he has begun going to classes every day."
Output: "He made progress by attening his classes and this shows recovery in acitvity of daily living."

Example 2:
Text: "she started exercising regularly and engaging with supportive friends."
Output: "she showed improvement in social recovery"

Example 3:
Text: "he was well dressed"
Output: "this shows activity of daily living recovery"

Example 4:
Text: "he was unkempt"
Output: "this shows deterioration in activity of daily living recovery"

Example 5:
Text: "he broke up with his girlfriend"
Output: "this shows deterioration in personal recovery"

Now, for the text you want to analyze:
Text: "{your text here}"
Output:
"""


In [None]:
# Query the model with the prompt
response = model(prompt)

In [None]:
# Print the extracted information about recovery
print(response)