In [8]:
import sys
import ollama
import prompts
import llm_system
from tqdm import tqdm
sys.path.append('../')
import post_processing

##### Load data

In [9]:
# load the data
data = post_processing.load_data()

##### Instances and parameters

In [10]:
# instances
instances_interpret = [27606, 15179, 29493, 19985, 3144, 19966, 945, 6368, 18524, 30216, 13578, 13400, 5213, 2855, 9869, 15183, 13296, 15463, 19307, 4658]
example_instances = [17170, 570, 27055, 24303, 19805]
# parameteres for the data
granularity = 'hourly'
# parameteres for the ML component
ml_task = 'clustering'
real_task = 'well-being'
target = 'cluster'
target_encoding = {0: 'negative', 1: 'positive'}
# parameteres for the XAI component
scope = 'local'
xai_method = 'lime'

##### LLM parameters

In [11]:
learning = 'few'
model = 'llama3'

##### Create system

In [12]:
if learning == 'zero':
    system_content = llm_system.create_system(ml_task, granularity, target_encoding, target, real_task, learning, scope, xai_method, str(0))
elif learning == 'one':
    system_content = llm_system.create_system(ml_task, granularity, target_encoding, target, real_task, learning, scope, xai_method, str(1))
elif learning == 'few':
    system_content = llm_system.create_system(ml_task, granularity, target_encoding, target, real_task, learning, scope, xai_method, str(5))
system_content

'You are a XAI model that can help me explain the clustering results of my data. I have a dataset with hourly wearable data. The clustering algorithm categorized the data into 2 clusters, where cluster 0 represents negative well-being and cluster 1 represents positive well-being. I need to understand why a user over time has been categorized into its respective cluster. You will be provided with 5 examples each one contains, in the first sentence, the features, values and clustering result, and, in a second sentence, the explanation produced by the local lime XAI method, which is based on feature importance, to explain this clustering result. After the examples, there will be a question containing features and their actual values. You need to compute the feature importance and explain the clustering results based on this feature importance. Your answer must contain only the exact following two parts: The "Developer response:" as exclusively a json format with keys to be the features yo

In [13]:
# manual system
system_content = "You are a XAI model that can help me explain the clustering results of my data. I have a dataset with hourly wearable data. The clustering algorithm categorized the data into 2 clusters, where cluster 0 represents negative well-being and cluster 1 represents positive well-being. I need to understand why a user over time has been categorized into its respective cluster. You will be provided with 5 examples each one contains, in the first sentence, the features, values and clustering result, and, in a second sentence, the explanation produced by the local lime XAI method, which is based on feature importance, to explain this clustering result. After the examples, there will be a question containing features and their actual values. You need to compute the feature importance for the provided question and explain the clustering results based on this feature importance. Your answer must contain only the exact following two parts: The \"Developer response:\" as exclusively a json format with keys to be the features you identified in the question and values to be their feature importance you computed. For example: \"steps\":0.1. Do not add any other information. The \"User response:\" in one short paragraph in which you explain the clustering results based on the feature importance you computed. The answer must include the features you identified along with their actual values. Answer in a consistent style, with clear, short and understandable sentences."

##### Prompt and get responses

In [14]:
# pbar = tqdm(total=len(instances_interpret), desc="Processing instances", unit="inst")
# for each instance
# for instance_interpret in instances_interpret:

instance_interpret = 27606
# prepare the prompt based on the learning technique
if learning == 'zero':
    prompt = prompts.zero_prompt(data, instance_interpret, target, target_encoding, granularity, real_task)
elif learning == 'one':
    prompt = prompts.one_prompt(data, instance_interpret, example_instances[0], target, target_encoding, granularity, real_task)
elif learning == 'few':
    prompt = prompts.few_prompt(data, instance_interpret, example_instances, target, target_encoding, granularity, real_task)

# prompt the llm
response = ollama.chat(model=model, messages=[
{
'role': 'system',
'content': system_content
},
{
'role': 'user',
'content': prompt,
},
])
response = response['message']['content']
response_path = '../data/llms_output/' + learning + '/' + model + '_' + str(instance_interpret) + '.txt'
with open(response_path, 'w') as f:
    f.write(response)
    # pbar.update(1) 
# pbar.close()