In [1]:
import sys
import json
import ollama
import prompts
import evaluation
import llm_system
import pandas as pd
import post_processing
sys.path.append('../')

##### Denormalize the data

In [2]:
# load the final clustering data
data = pd.read_csv('../data/clustering_results/kmeans_2_results_hourly_categories_nooutliers3.csv')
data_features = (data.columns).drop(['cluster'])  # final features
data['id'] = data['id'].astype(str)
data['date'] = pd.to_datetime(data['date'])
# load the latest version of denormalized data
old_data = pd.read_pickle('../data/preprocessing_temps/date_engineered_training_df.pkl')
old_data['id'] = old_data['id'].astype(str)
old_data['date'] = pd.to_datetime(old_data['date'])
# keep only the final columns
normalized_data = old_data[data_features]
# keep only the final clustering rows
normalized_data = pd.merge(normalized_data, data[['id', 'date', 'cluster']], on=['id', 'date'], how='inner')
normalized_data.head(2)

Unnamed: 0,id,date,exertion_points,step_goal,minutes_below_zone_1,minutes_in_zone_1,steps,very_active_minutes,minutes_in_zone_2,minutes_in_zone_3,altitude,lightly_active_minutes,moderately_active_minutes,sedentary_minutes,exercises,exercise_duration,sleep_points,sleep_duration,calories,cluster
0,621e2e8e67b776a24055b564,2021-05-24 00:00:00,27.0,0.0,1349.0,83.0,99.0,33.0,0.0,0.0,0.0,149.0,24.0,713.0,2,0.966944,25.0,0.0,16.82,1
1,621e2e8e67b776a24055b564,2021-05-24 01:00:00,27.0,0.0,1349.0,83.0,0.0,33.0,0.0,0.0,0.0,149.0,24.0,713.0,2,0.966944,25.0,0.0,2.29,1


##### Set application-based parameters for the pipeline

In [3]:
# parameteres for the data
granularity = 'hourly'
# parameteres for the ML component
ml_task = 'clustering'
real_task = 'well-being'
target = 'cluster'
target_encoding = {0: 'negative', 1: 'positive'}
# parameteres for the XAI component
scope = 'global'
xai_method = 'lime'
with open('../data/explainability_output/global_30000_lime.json', 'r') as file:
    xai_response = json.load(file)

##### Random selection of instances

In [4]:
# TODO: respect time-series split
instance_interpret = 0  # select instance to interpet
example_instance = 1
example_instances = [1, 2, 3, 4, 5]  # select instance to interpet

##### Prepare the learning techniques

In [5]:
learning = 'zero'
zero_prompt = prompts.zero_prompt(normalized_data, instance_interpret, target, target_encoding, granularity, real_task)
zero_prompt

"Why a user on Monday at 0 o'clock, who has 27.0 exertion points, 0.0 step goal, 1349.0 minutes below zone 1, 83.0 minutes in zone 1, 99.0 steps, 33.0 very active minutes, 0.0 minutes in zone 2, 0.0 minutes in zone 3, 0.0 altitude, 149.0 lightly active minutes, 24.0 moderately active minutes, 713.0 sedentary minutes, 2 exercises, 0.9669444444444444 exercise duration, 25.0 sleep points, 0.0 sleep duration, and 16.82 calories, has positive well-being?"

In [8]:
learning = 'one'
one_prompt = prompts.one_prompt(normalized_data, instance_interpret, example_instance, target, target_encoding, granularity, real_task, xai_response)
one_prompt

"A user on Monday at 1 o'clock, who has 27.0 exertion_points, 0.0 step_goal, 1349.0 minutes_below_zone_1, 83.0 minutes_in_zone_1, 0.0 steps, 33.0 very_active_minutes, 0.0 minutes_in_zone_2, 0.0 minutes_in_zone_3, 0.0 altitude, 149.0 lightly_active_minutes, 24.0 moderately_active_minutes, 713.0 sedentary_minutes, 2 exercises, 0.9669444444444444 exercise_duration, 25.0 sleep_points, 0.0 sleep_duration, and 2.29 calories, has also positive well-being. The explanation for this user's well-being gives the following feature importances: {'sedentary_minutes': -0.2678196236080476, 'minutes_below_zone_1': 0.2050667894345541, 'lightly_active_minutes': 0.16906059301131376, 'exertion_points': 0.11446402442036006, 'sleep_points': 0.09321390640193324, 'step_goal': 0.050659830363696666, 'moderately_active_minutes': 0.01652281093560166, 'altitude': 0.009195725507149047, 'calories': -0.00043989392483135593, 'minutes_in_zone_2': -0.007878766844829964, 'very_active_minutes': 0.003921462139110037, 'minute

In [11]:
learning = 'few'
few_prompt = prompts.few_prompt(normalized_data, instance_interpret, example_instances, target, target_encoding, granularity, real_task, xai_response)
few_prompt

"A user on Monday at 1 o'clock, who has 27.0 exertion_points, 0.0 step_goal, 1349.0 minutes_below_zone_1, 83.0 minutes_in_zone_1, 0.0 steps, 33.0 very_active_minutes, 0.0 minutes_in_zone_2, 0.0 minutes_in_zone_3, 0.0 altitude, 149.0 lightly_active_minutes, 24.0 moderately_active_minutes, 713.0 sedentary_minutes, 2 exercises, 0.9669444444444444 exercise_duration, 25.0 sleep_points, 0.0 sleep_duration, and 2.29 calories, has also positive well-being. The explanation for this user's well-being gives the following feature importances: {'sedentary_minutes': -0.2678196236080476, 'minutes_below_zone_1': 0.2050667894345541, 'lightly_active_minutes': 0.16906059301131376, 'exertion_points': 0.11446402442036006, 'sleep_points': 0.09321390640193324, 'step_goal': 0.050659830363696666, 'moderately_active_minutes': 0.01652281093560166, 'altitude': 0.009195725507149047, 'calories': -0.00043989392483135593, 'minutes_in_zone_2': -0.007878766844829964, 'very_active_minutes': 0.003921462139110037, 'minute

##### Set the system content

In [12]:
if learning == 'zero':
    examples = 0
elif learning == 'one':
    examples = 1
elif learning == 'few':
    examples = 5  # change the number of the examples in the few-shot learning
system_content = llm_system.create_system(ml_task, granularity, target_encoding, target, real_task, learning, scope, xai_method, str(examples))
system_content

'You are a XAI model that can help me explain the clustering results of my data. I have a dataset with hourly wearable data. The clustering algorithm categorized the data into 2 clusters, where cluster 0 represents negative well-being and cluster 1 represents positive well-being. I need to understand why a user over time has been categorized into its respective cluster. You will be provided with 5 examples each one contains, in the first sentence, the features, values and clustering result, and, in a second sentence, the explanation produced by the global lime XAI method, which is based on feature importance, to explain this clustering result. After the examples, there will be a question containing features and their actual values. You need to compute the feature importance and explain the clustering results based on this feature importance. Your answer must contain only the exact following two parts: The "Developer response:" as exclusively a json format with keys to be the features y

##### Prompt the LLM
Choose between: llama3 and mistral

In [8]:
query = one_prompt
response = ollama.chat(model='llama3', messages=[
  {
    'role': 'system',
    'content': system_content
  },
  {
    'role': 'user',
    'content': query,
  },
  ])
response = response['message']['content']
response

'Developer response:\n{"steps": -0.00046214920862635043, "exertion_points": 0.11446402442036006, "step_goal": 0.050659830363696666, "minutes_below_zone_1": 0.2050667894345541, "minutes_in_zone_1": 0.007163434241415815, "sedentary_minutes": -0.2678196236080476}\n\nUser response:\nThis user has positive well-being because the model highlights the importance of steps and exertion points in determining this result. The user\'s 99.0 steps suggest they have been active throughout the day, which aligns with their positive well-being. Additionally, the moderate amount of exertion points (27) indicates that the user has been engaging in some form of physical activity, further supporting their positive well-being.'

Post-processing in LLM's response

In [9]:
model = 'llama3'

In [10]:
# post-process and store user response
profile = 'user'
user_response = response.split("User response:", 1)[1].strip()
post_processing.store_response(user_response, model, learning, str(instance_interpret), profile)
user_response

"This user has positive well-being because the model highlights the importance of steps and exertion points in determining this result. The user's 99.0 steps suggest they have been active throughout the day, which aligns with their positive well-being. Additionally, the moderate amount of exertion points (27) indicates that the user has been engaging in some form of physical activity, further supporting their positive well-being."

In [11]:
# post-process and store developer response
profile = 'developer'
developer_response = post_processing.developer_response_processing(response)
post_processing.store_response(developer_response, model, learning, str(instance_interpret), profile)
developer_response

{'steps': -0.00046214920862635043,
 'exertion_points': 0.11446402442036006,
 'step_goal': 0.050659830363696666,
 'minutes_below_zone_1': 0.2050667894345541,
 'minutes_in_zone_1': 0.007163434241415815,
 'sedentary_minutes': -0.2678196236080476}

##### Evaluate the structural quality of the explanation

In [12]:
evaluation.structural_quality_evaluation(query, user_response)

Coherence/Relevance Score: 0.7639686465263367
Number of Grammatical Errors: 0
Flesch Reading Ease: 37.18954545454546
Sentiment Consistency Score: 0.2382299182299182
Percentage of concepts covered: 0.11428571428571428
Concepts Covered: {'positive', '27', '99', 'user', 'points', 'exertion', 'steps', 'active'}
Percentage of new concepts introduced: 0.6666666666666666
New Concepts Introduced: {'highlights', 'result', 'moderate', 'day', 'determining', 'supporting', 'aligns', 'form', 'additionally', 'suggest', 'activity', 'indicates', 'model', 'physical', 'importance', 'engaging'}


##### Evaluate the content quality of the explanation

In [13]:
evaluation.content_xai_quality_evaluation(xai_response, developer_response)

Spearman Rank Correlation: 1.0
NDCG Difference: 0.0
