In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Sample data
data = [
  {
    "age": 25,
    "weight": 70, 
    "height": 175, 
    "sex": "male",
    "exercise": {
      "frequency": "3 times a week",
      "intensity": "moderate",
      "type": ["running", "weightlifting"]
    },
    "food_allergies": ["peanuts"],
    "work_schedule": {
      "hours_per_day": 8,
      "days_per_week": 5
    },
    "recommended_exercise": "yoga"  # Adding flexibility exercises to balance routine
  },
  {
    "age": 30,
    "weight": 60, 
    "height": 160, 
    "sex": "female",
    "exercise": {
      "frequency": "daily",
      "intensity": "high",
      "type": ["yoga", "cycling"]
    },
    "food_allergies": ["gluten"],
    "work_schedule": {
      "hours_per_day": 7,
      "days_per_week": 6
    },
    "recommended_exercise": "swimming"  # Adding a low-impact cardiovascular exercise
  },
  {
    "age": 22,
    "weight": 75, 
    "height": 180, 
    "sex": "male",
    "exercise": {
      "frequency": "2 times a week",
      "intensity": "low",
      "type": ["swimming"]
    },
    "food_allergies": [],
    "work_schedule": {
      "hours_per_day": 6,
      "days_per_week": 4
    },
    "recommended_exercise": "weightlifting"  # Adding strength training to the routine
  },
  {
    "age": 28,
    "weight": 65, 
    "height": 165, 
    "sex": "female",
    "exercise": {
      "frequency": "5 times a week",
      "intensity": "moderate",
      "type": ["running", "yoga"]
    },
    "food_allergies": ["dairy"],
    "work_schedule": {
      "hours_per_day": 9,
      "days_per_week": 5
    },
    "recommended_exercise": "HIIT"  # Adding high-intensity interval training for variety
  },
  {
    "age": 35,
    "weight": 80, 
    "height": 170, 
    "sex": "male",
    "exercise": {
      "frequency": "4 times a week",
      "intensity": "high",
      "type": ["weightlifting", "running"]
    },
    "food_allergies": ["shellfish"],
    "work_schedule": {
      "hours_per_day": 8,
      "days_per_week": 6
    },
    "recommended_exercise": "swimming"  # Adding a low-impact cardiovascular exercise
  },
  {
    "age": 40,
    "weight": 68, 
    "height": 160, 
    "sex": "female",
    "exercise": {
      "frequency": "3 times a week",
      "intensity": "low",
      "type": ["walking", "yoga"]
    },
    "food_allergies": [],
    "work_schedule": {
      "hours_per_day": 9,
      "days_per_week": 5
    },
    "recommended_exercise": "cycling"  # Adding a moderate-intensity cardiovascular exercise
  },
  {
    "age": 45,
    "weight": 90, 
    "height": 180, 
    "sex": "male",
    "exercise": {
      "frequency": "daily",
      "intensity": "moderate",
      "type": ["cycling", "swimming"]
    },
    "food_allergies": ["soy"],
    "work_schedule": {
      "hours_per_day": 10,
      "days_per_week": 6
    },
    "recommended_exercise": "yoga"  # Adding flexibility exercises for stress relief and balance
  },
  {
    "age": 32,
    "weight": 58, 
    "height": 155, 
    "sex": "female",
    "exercise": {
      "frequency": "2 times a week",
      "intensity": "moderate",
      "type": ["running", "pilates"]
    },
    "food_allergies": ["nuts"],
    "work_schedule": {
      "hours_per_day": 8,
      "days_per_week": 5
    },
    "recommended_exercise": "weightlifting"  # Adding strength training to balance the routine
  },
  {
    "age": 29,
    "weight": 85, 
    "height": 175, 
    "sex": "male",
    "exercise": {
      "frequency": "5 times a week",
      "intensity": "high",
      "type": ["weightlifting", "HIIT"]
    },
    "food_allergies": ["gluten", "dairy"],
    "work_schedule": {
      "hours_per_day": 7,
      "days_per_week": 5
    },
    "recommended_exercise": "swimming"  # Adding a low-impact cardiovascular exercise
  },
  {
    "age": 23,
    "weight": 72, 
    "height": 165, 
    "sex": "female",
    "exercise": {
      "frequency": "daily",
      "intensity": "moderate",
      "type": ["dance", "yoga"]
    },
    "food_allergies": [],
    "work_schedule": {
      "hours_per_day": 6,
      "days_per_week": 4
    },
    "recommended_exercise": "weightlifting"  # Adding strength training for muscle building
  }
]

# Preprocess data
def preprocess_data(data):
    rows = []
    for entry in data:
        row = {
            "age": entry["age"],
            "weight": entry["weight"],
            "height": entry["height"],
            "sex": entry["sex"],
            "exercise_frequency": entry["exercise"]["frequency"],
            "exercise_intensity": entry["exercise"]["intensity"],
            "exercise_type": ",".join(entry["exercise"]["type"]),
            "food_allergies": ",".join(entry["food_allergies"]),
            "work_hours_per_day": entry["work_schedule"]["hours_per_day"],
            "work_days_per_week": entry["work_schedule"]["days_per_week"],
        }
        if "recommended_exercise" in entry:
            row["recommended_exercise"] = entry["recommended_exercise"]
        rows.append(row)
    return pd.DataFrame(rows)

# Convert categorical features to numerical values
def encode_features(df, target_column=None):
    label_encoders = {}
    for column in ["sex", "exercise_frequency", "exercise_intensity", "exercise_type", "food_allergies"]:
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le
    
    if target_column and target_column in df.columns:
        le_target = LabelEncoder()
        df[target_column] = le_target.fit_transform(df[target_column])
        label_encoders[target_column] = le_target

    return df, label_encoders

# Extract features and target
df = preprocess_data(data)
df, label_encoders = encode_features(df, target_column="recommended_exercise")
X = df.drop("recommended_exercise", axis=1)  # Features
y = df["recommended_exercise"]  # Target

# Train Random Forest model
clf = RandomForestClassifier()
clf.fit(X, y)

def predict_exercise_with_explanation(user_profile, clf, label_encoders):
    # Preprocess and encode the user profile
    user_df = preprocess_data([user_profile])
    user_df, _ = encode_features(user_df)  # No target_column needed here
    user_features = user_df.drop("recommended_exercise", axis=1, errors='ignore')  # Drop column if exists
    
    # Predict the exercise
    predicted_exercise = clf.predict(user_features)[0]
    
    # Extract feature importances
    feature_importances = clf.feature_importances_
    feature_names = X.columns
    feature_importance_dict = dict(zip(feature_names, feature_importances))
    
    # Get top three features based on importance
    sorted_importances = sorted(feature_importance_dict.items(), key=lambda x: x[1], reverse=True)
    top_features = sorted_importances[:3]
    
    # Generate explanation
    explanation = f"Based on your profile, the recommended exercise is: {label_encoders['recommended_exercise'].inverse_transform([predicted_exercise])[0]}.\n\n"
    explanation += "Here is how the top three features contributed to this recommendation:\n"
    
    feature_descriptions = {
        "exercise_type": "the types of physical exercise you already engage in",
        "exercise_intensity": "the intensity of the physical exercise you already engage in",
        "food_allergies": "your food allergies",
        "age": "your age",
        "weight": "your weight",
        "height": "your height",
        "sex": "your sex",
        "exercise_frequency": "how often you exercise",
        "work_hours_per_day": "how many hours you work per day",
        "work_days_per_week": "how many days you work per week"
    }
    
    for feature, importance in top_features:
        percentage = importance * 100  # Convert to percentage
        description = feature_descriptions.get(feature, feature)
        explanation += f"- {percentage:.2f}% of the model's decision is based on {description}.\n"
    
    return explanation

new_user = {
    "age": 32,
    "weight": 58, 
    "height": 155, 
    "sex": "female",
    "exercise": {
      "frequency": "2 times a week",
      "intensity": "moderate",
      "type": ["running", "pilates"]
    },
    "food_allergies": ["nuts"],
    "work_schedule": {
      "hours_per_day": 8,
      "days_per_week": 5
    }
}

explanation = predict_exercise_with_explanation(new_user, clf, label_encoders)
print(explanation)


Based on your profile, the recommended exercise is: weightlifting.

Here is how the top three features contributed to this recommendation:
- 20.84% of the model's decision is based on the types of physical exercise you already engage in.
- 17.66% of the model's decision is based on the intensity of the physical exercise you already engage in.
- 10.79% of the model's decision is based on how many hours you work per day.

