In [1]:
import sys
import json
import ollama
import prompts
import evaluation
import pandas as pd
sys.path.append('../')

##### Denormalize the data

In [2]:
# load the final clustering data
data = pd.read_csv('../data/clustering_results/kmeans_2_results_hourly_categories_nooutliers3.csv')
data_features = (data.columns).drop(['cluster'])  # final features
data['id'] = data['id'].astype(str)
data['date'] = pd.to_datetime(data['date'])
# load the latest version of denormalized data
old_data = pd.read_pickle('../data/preprocessing_temps/date_engineered_training_df.pkl')
old_data['id'] = old_data['id'].astype(str)
old_data['date'] = pd.to_datetime(old_data['date'])
# keep only the final columns
normalized_data = old_data[data_features]
# keep only the final clustering rows
normalized_data = pd.merge(normalized_data, data[['id', 'date', 'cluster']], on=['id', 'date'], how='inner')
normalized_data.head(2)

Unnamed: 0,id,date,exertion_points,step_goal,minutes_below_zone_1,minutes_in_zone_1,steps,very_active_minutes,minutes_in_zone_2,minutes_in_zone_3,altitude,lightly_active_minutes,moderately_active_minutes,sedentary_minutes,exercises,exercise_duration,sleep_points,sleep_duration,calories,cluster
0,621e2e8e67b776a24055b564,2021-05-24 00:00:00,27.0,0.0,1349.0,83.0,99.0,33.0,0.0,0.0,0.0,149.0,24.0,713.0,2,0.966944,25.0,0.0,16.82,1
1,621e2e8e67b776a24055b564,2021-05-24 01:00:00,27.0,0.0,1349.0,83.0,0.0,33.0,0.0,0.0,0.0,149.0,24.0,713.0,2,0.966944,25.0,0.0,2.29,1


##### Prepare the learning technique

In [3]:
instance = 0  # select instance to interpet
prompt = prompts.zero_prompt(normalized_data, instance)
prompt

"Why a user on Monday at 0 o'clock, who has 27.0 exertion points, 0.0 step goal, 1349.0 minutes below zone 1, 83.0 minutes in zone 1, 99.0 steps, 33.0 very active minutes, 0.0 minutes in zone 2, 0.0 minutes in zone 3, 0.0 altitude, 149.0 lightly active minutes, 24.0 moderately active minutes, 713.0 sedentary minutes, 2 exercises, 0.9669444444444444 exercise duration, and 25.0 sleep points, 0.0 sleep duration, 16.82 calories has positive well-being?"

##### Set the system content

In [4]:
system_content = 'You are a XAI model that can help me explain the clustering results of my data. I have a dataset with hourly wearable data regarding physical activity, sleep, and health.' + \
'I have clustered the data into two clusters, where cluster one represents positive well-being and cluster zero represents negative well-being.' + \
'I need to understand why a user over time has been clustered into its respective cluster.' + \
'You will be provided with a text containing features and their actual values. You need to compute the feature importance and explain the clustering results based on this feature importance.' + \
'Your answer must contain the exact following two parts:' + \
'The "Developer response:" as exclusively a json format with keys to be the features you identified in the text and values to be their feature importance you computed. For example: "steps":0.1. Do not add any other information.' + \
'The "User response:" in one short paragraph in which you explain the clustering results based on the feature importance you computed. The answer must include the features you identified along with their actual values.' +\
'Answer in a consistent style.'

##### Prompt the LLM
Choose between: llama3, llama2-uncensored, mistral, phi3, gemma:2b

In [24]:
query = prompt
response = ollama.chat(model='phi3', messages=[
  {
    'role': 'system',
    'content': system_content
  },
  {
    'role': 'user',
    'content': prompt,
  },
  ])
response = response['message']['content']
response

'{\n    "exertion_points": 0.3793700787425936,\n    "minutes_below_zone_1": 0.2793700787425936,\n    "steps": 0.09999999999999998,\n    "active_minutes": 0.03333333333333333,\n    "exercise_duration": 0.5612477685313302\n}\n\nThe user has positive well-being on Monday at 0 o\'clock as indicated by the relatively high exertion points (27.0) and a considerable amount of time spent in zone 1 (83.0 minutes). Although their step goal wasn\'t met, they compensated with moderate activity levels across various zones. The user exercised twice, which contributes to overall well-being. However, the longer sedentary periods might have slightly diminished positive impacts on well-being.\n\nUser response:\nThe clustering results suggest that this particular Monday\'s activities for a user can be classified under positive well-being despite not meeting their step goal. The high exertion points (27.0) indicate a significant level of physical activity which contributes to overall health and wellness. A

In [25]:
split_responses = response.split('\n\n')
split_responses
developer_response = split_responses[0]
user_response = split_responses[1]
print(developer_response)
print(user_response)

{
    "exertion_points": 0.3793700787425936,
    "minutes_below_zone_1": 0.2793700787425936,
    "steps": 0.09999999999999998,
    "active_minutes": 0.03333333333333333,
    "exercise_duration": 0.5612477685313302
}
The user has positive well-being on Monday at 0 o'clock as indicated by the relatively high exertion points (27.0) and a considerable amount of time spent in zone 1 (83.0 minutes). Although their step goal wasn't met, they compensated with moderate activity levels across various zones. The user exercised twice, which contributes to overall well-being. However, the longer sedentary periods might have slightly diminished positive impacts on well-being.


##### Evaluate the structural quality of the explanation

In [26]:
user_response = user_response.replace('User response:', '')
evaluation.structural_quality_evaluation(user_response, prompt)

Coherence/Relevance Score: 0.7992367744445801
Number of Grammatical Errors: 1
Automated Readability Index: 41.327865168539326
Sentiment Consistency Score: 0.10294612794612794
Concepts Covered: {'positive', 'step', 'zone', 'minutes', 'goal', 'points', 'user', '83', '27', 'clock', 'monday', 'exertion', 'sedentary'}
New Concepts Introduced: {'calories', '33', 'lightly', 'altitude', 'moderately', '25', '149', '24', 'sleep', '713', '82', 'exercise', '1349', '9669444444444444', 'active', 'duration', '99', '16', 'steps', 'exercises'}


##### Evaluate the content quality of the explanation

In [27]:
# format the developer response into a json file
developer_response = developer_response.replace('Developer response:', '')
developer_response = developer_response.replace('\n', '')
# developer_response = developer_response[:-1]
developer_response = json.loads(developer_response)
# # developer_response = {k.replace(' ', '_'): v for k, v in developer_response.items()}
developer_response

{'exertion_points': 0.3793700787425936,
 'minutes_below_zone_1': 0.2793700787425936,
 'steps': 0.09999999999999998,
 'active_minutes': 0.03333333333333333,
 'exercise_duration': 0.5612477685313302}

In [28]:
# laod the global xai response
with open('../data/explainability_output/global_30000_lime.json', 'r') as file:
    xai_response = json.load(file)
xai_response

{'sedentary_minutes': -0.2678196236080476,
 'minutes_below_zone_1': 0.2050667894345541,
 'lightly_active_minutes': 0.16906059301131376,
 'exertion_points': 0.11446402442036006,
 'sleep_points': 0.09321390640193324,
 'step_goal': 0.050659830363696666,
 'moderately_active_minutes': 0.01652281093560166,
 'altitude': 0.009195725507149047,
 'calories': -0.00043989392483135593,
 'minutes_in_zone_2': -0.007878766844829964,
 'very_active_minutes': 0.003921462139110037,
 'minutes_in_zone_1': 0.007163434241415815,
 'minutes_in_zone_3': 6.230855806351155e-05,
 'steps': -0.00046214920862635043,
 'sleep_duration': -4.760002022959731e-05,
 'exercise_duration': -0.0006767382662127337,
 'exercises': -0.0012635979248482498}

In [29]:
evaluation.content_xai_quality_evaluation(xai_response, developer_response)

Relative distance between global LIME feature importance vs. LLM feature importance 0.7166590580224861
