In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report, accuracy_score

# Set the number of rows for synthetic data
rows_count = 1200

# Seeding for reproducibility
np.random.seed(101)

# Generate user age between 18 to 80
user_ages = np.random.randint(18, 81, rows_count)

# Assign gender randomly with equal distribution
user_gender = np.random.choice(['Male', 'Female'], rows_count)

# Generate BMI values between 18.5 and 40
user_bmi = np.round(np.random.uniform(18.5, 40.0, rows_count), 2)

# Generate smoking status: 1 (Smoker), 0 (Non-smoker)
smoker_status = np.random.choice([0, 1], rows_count, p=[0.7, 0.3])

# Generate physical activity levels: 1 to 4 (Sedentary to Very Active)
activity_level = np.random.choice([1, 2, 3, 4], rows_count, p=[0.4, 0.3, 0.2, 0.1])

# Define possible health conditions with probabilities
conditions = ['Diabetes', 'Hypertension', 'Asthma', 'Heart Disease', 'Obesity', 'Arthritis',
              'None', 'Depression', 'Anxiety', 'COPD', 'Kidney Disease', 'Osteoporosis',
              'Allergies', 'High Cholesterol', 'Thyroid Disorders', 'Chronic Fatigue']

condition_probabilities = [0.11, 0.14, 0.07, 0.06, 0.09, 0.07, 0.15, 0.04, 0.04, 
                           0.02, 0.03, 0.03, 0.05, 0.07, 0.03, 0.02]

# Normalize probabilities to ensure they sum to 1
condition_probabilities = np.array(condition_probabilities) / sum(condition_probabilities)

# Generate medical history for each user
medical_hist = np.random.choice(conditions, rows_count, p=condition_probabilities)

# Function to generate personalized health tips
def provide_health_suggestions(condition, bmi_value, smoking, activity):
    tips = []
    
    # Health advice based on medical condition
    if condition == 'Diabetes':
        tips.append("Monitor your blood sugar regularly and follow a low-sugar diet.")
    elif condition == 'Hypertension':
        tips.append("Reduce sodium intake, exercise regularly, and avoid stress.")
    elif condition == 'Asthma':
        tips.append("Avoid allergens, and practice breathing exercises.")
    elif condition == 'Heart Disease':
        tips.append("Opt for heart-healthy diets, and reduce intake of trans fats.")
    elif condition == 'Obesity':
        tips.append("Adopt a calorie deficit diet and increase physical activity.")
    elif condition == 'Depression':
        tips.append("Consider counseling, and practice mindfulness and relaxation.")
    
    # Suggestions based on BMI
    if bmi_value > 30:
        tips.append("Focus on weight reduction strategies.")
    elif bmi_value < 18.5:
        tips.append("Increase calorie intake with nutritious meals.")
    
    # Smoking habit advice
    if smoking:
        tips.append("Quit smoking to lower risk of chronic diseases.")
    
    # Physical activity suggestions
    if activity == 1:
        tips.append("Try to incorporate more physical activities into your routine.")
    elif activity == 4:
        tips.append("Great job maintaining high activity levels!")
    
    # Combine tips into a single string
    return ' | '.join(tips)

# Generate health tips for all users based on their data
health_recommendations = [
    provide_health_suggestions(medical_hist[i], user_bmi[i], smoker_status[i], activity_level[i]) 
    for i in range(rows_count)
]

# Creating a DataFrame with the generated data
health_data = pd.DataFrame({
    'User_ID': np.arange(1, rows_count + 1),
    'Age': user_ages,
    'Gender': user_gender,
    'BMI': user_bmi,
    'Smoker': smoker_status,
    'Activity Level': activity_level,
    'Medical Condition': medical_hist,
    'Health Recommendations': health_recommendations
})

# Save the generated data to a CSV file
health_data.to_csv('synthetic_health_data.csv', index=False)

# Display the first few rows of the DataFrame
print(health_data.head())

# Label encoding for categorical variables like Gender and Medical Condition
gender_encoder = LabelEncoder()
condition_encoder = LabelEncoder()

# Apply label encoding
health_data['Gender_Encoded'] = gender_encoder.fit_transform(health_data['Gender'])
health_data['Condition_Encoded'] = condition_encoder.fit_transform(health_data['Medical Condition'])

# Standardizing the numerical features such as Age, BMI, and Activity Level
scaler = StandardScaler()
health_data[['Age_Std', 'BMI_Std', 'Activity_Std']] = scaler.fit_transform(
    health_data[['Age', 'BMI', 'Activity Level']]
)

# Create a set of all unique health tips
all_tips = set()
for tips in health_data['Health Recommendations']:
    all_tips.update(tips.split(' | '))

# Create binary columns for each health tip
for tip in all_tips:
    health_data[f'Tip_{tip}'] = health_data['Health Recommendations'].apply(lambda x: 1 if tip in x else 0)

# Defining the features (X) and target (y)
X = health_data[['Age_Std', 'BMI_Std', 'Activity_Std', 'Gender_Encoded', 'Smoker', 'Condition_Encoded']]
y = health_data[[col for col in health_data.columns if col.startswith('Tip_')]]

# Splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=2023)

print("Training and testing datasets created successfully.")

# Building a MultiOutputClassifier with RandomForestClassifier
rf_pipeline = Pipeline([
    ('model', MultiOutputClassifier(RandomForestClassifier(n_estimators=100, random_state=2023)))
])

# Fitting the model
rf_pipeline.fit(X_train, y_train)

print("Model built and trained on the training data.")

# Make predictions
y_pred = rf_pipeline.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred, target_names=y.columns))

# Function to convert predictions back to health tips
def predictions_to_tips(predictions, tip_columns):
    return [' | '.join([tip.replace('Tip_', '') for tip, pred in zip(tip_columns, row) if pred == 1])
            for row in predictions]

# Convert predictions to health tips
predicted_tips = predictions_to_tips(y_pred, y.columns)

# Display some sample predictions
for true, pred in zip(y_test.iloc[:5].values, predicted_tips[:5]):
    print("True:", ' | '.join([tip.replace('Tip_', '') for tip, val in zip(y.columns, true) if val == 1]))
    print("Predicted:", pred)
    print()

def suggest_health_tips(user_input):
    """
    Input: user_input (list) - User data [age, bmi, activity level, gender, smoking status, condition]
    Output: health_tip (str) - Predicted health recommendation
    """
    # Encode categorical variables
    user_input[3] = gender_encoder.transform([user_input[3]])[0]  # Gender
    user_input[5] = condition_encoder.transform([user_input[5]])[0]  # Medical Condition

    # Standardize the numerical input values (age, bmi, activity level)
    user_input_std = scaler.transform([[user_input[0], user_input[1], user_input[2]]])

    # Combine standardized and non-standardized inputs
    user_input_combined = np.hstack((user_input_std, [[user_input[3], user_input[4], user_input[5]]]))
    
    # Predict the health tips
    predicted_tips = rf_pipeline.predict(user_input_combined)
    
    # Convert predictions to readable tips
    return predictions_to_tips(predicted_tips, y.columns)[0]

# Example of how to use the function
sample_user_data = [35, 29.4, 3, 'Male', 0, 'Obesity']  # [Age, BMI, Activity Level, Gender, Non-Smoker, Medical Condition]
recommended_tip = suggest_health_tips(sample_user_data)

print(f"Suggested health tip for the user: {recommended_tip}")

# Example usage for different users
user_1 = [45, 32.0, 1, 'Female', 1, 'Asthma']  # Aged 45, BMI 32, Sedentary, Female, Smoker, Medical Condition: Asthma
user_2 = [28, 22.5, 4, 'Male', 0, 'Diabetes']  # Aged 28, BMI 22.5, Very Active, Male, Non-Smoker, Medical Condition: Diabetes

# Get recommendations for both users
tip_for_user_1 = suggest_health_tips(user_1)
tip_for_user_2 = suggest_health_tips(user_2)

print(f"Recommendation for User 1: {tip_for_user_1}")
print(f"Recommendation for User 2: {tip_for_user_2}")

# Predict the health tips for the test set
y_pred = rf_pipeline.predict(X_test)

# Evaluate model accuracy and performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=y.columns)

print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(report)

   User_ID  ...                             Health Recommendations
0        1  ...  Focus on weight reduction strategies. | Great ...
1        2  ...              Focus on weight reduction strategies.
2        3  ...  Consider counseling, and practice mindfulness ...
3        4  ...  Monitor your blood sugar regularly and follow ...
4        5  ...  Monitor your blood sugar regularly and follow ...

[5 rows x 8 columns]
Training and testing datasets created successfully.
Model built and trained on the training data.
                                                                     precision    recall  f1-score   support

                                                               Tip_       1.00      1.00      1.00       300
   Tip_Adopt a calorie deficit diet and increase physical activity.       1.00      1.00      1.00        24
 Tip_Try to incorporate more physical activities into your routine.       1.00      1.00      1.00       128
                    Tip_Great job maintai

In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.multioutput import MultiOutputClassifier
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib

# Set the number of rows for synthetic data
rows_count = 1200

# Seeding for reproducibility
np.random.seed(101)

# Generate user age between 18 to 80
user_ages = np.random.randint(18, 81, rows_count)

# Assign gender randomly with equal distribution
user_gender = np.random.choice(['Male', 'Female'], rows_count)

# Generate BMI values between 18.5 and 40
user_bmi = np.round(np.random.uniform(18.5, 40.0, rows_count), 2)

# Generate smoking status: 1 (Smoker), 0 (Non-smoker)
smoker_status = np.random.choice([0, 1], rows_count, p=[0.7, 0.3])

# Generate physical activity levels: 1 to 4 (Sedentary to Very Active)
activity_level = np.random.choice([1, 2, 3, 4], rows_count, p=[0.4, 0.3, 0.2, 0.1])

# Define possible health conditions with probabilities
conditions = ['Diabetes', 'Hypertension', 'Asthma', 'Heart Disease', 'Obesity', 'Arthritis',
              'None', 'Depression', 'Anxiety', 'COPD', 'Kidney Disease', 'Osteoporosis',
              'Allergies', 'High Cholesterol', 'Thyroid Disorders', 'Chronic Fatigue']

condition_probabilities = [0.11, 0.14, 0.07, 0.06, 0.09, 0.07, 0.15, 0.04, 0.04, 
                           0.02, 0.03, 0.03, 0.05, 0.07, 0.03, 0.02]

# Normalize probabilities to ensure they sum to 1
condition_probabilities = np.array(condition_probabilities) / sum(condition_probabilities)

# Generate medical history for each user
medical_hist = np.random.choice(conditions, rows_count, p=condition_probabilities)

# Function to generate personalized health tips
def provide_health_suggestions(condition, bmi_value, smoking, activity):
    tips = []
    
    # Health advice based on medical condition
    if condition == 'Diabetes':
        tips.append("Monitor your blood sugar regularly and follow a low-sugar diet.")
    elif condition == 'Hypertension':
        tips.append("Reduce sodium intake, exercise regularly, and avoid stress.")
    elif condition == 'Asthma':
        tips.append("Avoid allergens, and practice breathing exercises.")
    elif condition == 'Heart Disease':
        tips.append("Opt for heart-healthy diets, and reduce intake of trans fats.")
    elif condition == 'Obesity':
        tips.append("Adopt a calorie deficit diet and increase physical activity.")
    elif condition == 'Depression':
        tips.append("Consider counseling, and practice mindfulness and relaxation.")
    
    # Suggestions based on BMI
    if bmi_value > 30:
        tips.append("Focus on weight reduction strategies.")
    elif bmi_value < 18.5:
        tips.append("Increase calorie intake with nutritious meals.")
    
    # Smoking habit advice
    if smoking:
        tips.append("Quit smoking to lower risk of chronic diseases.")
    
    # Physical activity suggestions
    if activity == 1:
        tips.append("Try to incorporate more physical activities into your routine.")
    elif activity == 4:
        tips.append("Great job maintaining high activity levels!")
    
    # Combine tips into a single string
    return ' | '.join(tips)

# Generate health tips for all users based on their data
health_recommendations = [
    provide_health_suggestions(medical_hist[i], user_bmi[i], smoker_status[i], activity_level[i]) 
    for i in range(rows_count)
]

# Creating a DataFrame with the generated data
health_data = pd.DataFrame({
    'User_ID': np.arange(1, rows_count + 1),
    'Age': user_ages,
    'Gender': user_gender,
    'BMI': user_bmi,
    'Smoker': smoker_status,
    'Activity Level': activity_level,
    'Medical Condition': medical_hist,
    'Health Recommendations': health_recommendations
})

# Save the generated data to a CSV file
health_data.to_csv('synthetic_health_data.csv', index=False)

# Display the first few rows of the DataFrame
print(health_data.head())

# Label encoding for categorical variables like Gender and Medical Condition
gender_encoder = LabelEncoder()
condition_encoder = LabelEncoder()

# Apply label encoding
health_data['Gender_Encoded'] = gender_encoder.fit_transform(health_data['Gender'])
health_data['Condition_Encoded'] = condition_encoder.fit_transform(health_data['Medical Condition'])

# Standardizing the numerical features such as Age, BMI, and Activity Level
scaler = StandardScaler()
health_data[['Age_Std', 'BMI_Std', 'Activity_Std']] = scaler.fit_transform(
    health_data[['Age', 'BMI', 'Activity Level']]
)

# Create a set of all unique health tips
all_tips = set()
for tips in health_data['Health Recommendations']:
    all_tips.update(tips.split(' | '))

# Create binary columns for each health tip
for tip in all_tips:
    health_data[f'Tip_{tip}'] = health_data['Health Recommendations'].apply(lambda x: 1 if tip in x else 0)

# Defining the features (X) and target (y)
X = health_data[['Age_Std', 'BMI_Std', 'Activity_Std', 'Gender_Encoded', 'Smoker', 'Condition_Encoded']]
y = health_data[[col for col in health_data.columns if col.startswith('Tip_')]]

# Splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=2023)

print("Training and testing datasets created successfully.")

# Building a MultiOutputClassifier with RandomForestClassifier
rf_pipeline = Pipeline([
    ('model', MultiOutputClassifier(RandomForestClassifier(n_estimators=100, random_state=2023)))
])

# Fitting the model
rf_pipeline.fit(X_train, y_train)

print("Model built and trained on the training data.")

# Make predictions
y_pred = rf_pipeline.predict(X_test)

# Evaluate the model
print(classification_report(y_test, y_pred, target_names=y.columns))

# Function to convert predictions back to health tips
def predictions_to_tips(predictions, tip_columns):
    return [' | '.join([tip.replace('Tip_', '') for tip, pred in zip(tip_columns, row) if pred == 1])
            for row in predictions]

# Convert predictions to health tips
predicted_tips = predictions_to_tips(y_pred, y.columns)

# Display some sample predictions
for true, pred in zip(y_test.iloc[:5].values, predicted_tips[:5]):
    print("True:", ' | '.join([tip.replace('Tip_', '') for tip, val in zip(y.columns, true) if val == 1]))
    print("Predicted:", pred)
    print()

def suggest_health_tips(user_input):
    """
    Input: user_input (list) - User data [age, bmi, activity level, gender, smoking status, condition]
    Output: health_tip (str) - Predicted health recommendation
    """
    # Encode categorical variables
    user_input[3] = gender_encoder.transform([user_input[3]])[0]  # Gender
    user_input[5] = condition_encoder.transform([user_input[5]])[0]  # Medical Condition

    # Standardize the numerical input values (age, bmi, activity level)
    user_input_std = scaler.transform([[user_input[0], user_input[1], user_input[2]]])

    # Combine standardized and non-standardized inputs
    user_input_combined = np.hstack((user_input_std, [[user_input[3], user_input[4], user_input[5]]]))
    
    # Predict the health tips
    predicted_tips = rf_pipeline.predict(user_input_combined)
    
    # Convert predictions to readable tips
    return predictions_to_tips(predicted_tips, y.columns)[0]

# Example of how to use the function
sample_user_data = [35, 29.4, 3, 'Male', 0, 'Obesity']  # [Age, BMI, Activity Level, Gender, Non-Smoker, Medical Condition]
recommended_tip = suggest_health_tips(sample_user_data)

print(f"Suggested health tip for the user: {recommended_tip}")

# Example usage for different users
user_1 = [45, 32.0, 1, 'Female', 1, 'Asthma']  # Aged 45, BMI 32, Sedentary, Female, Smoker, Medical Condition: Asthma
user_2 = [28, 22.5, 4, 'Male', 0, 'Diabetes']  # Aged 28, BMI 22.5, Very Active, Male, Non-Smoker, Medical Condition: Diabetes

# Get recommendations for both users
tip_for_user_1 = suggest_health_tips(user_1)
tip_for_user_2 = suggest_health_tips(user_2)

print(f"Recommendation for User 1: {tip_for_user_1}")
print(f"Recommendation for User 2: {tip_for_user_2}")

# Content-based Recommendation System
def content_based_recommender(user_profile, n_recommendations=3):
    # Combine user features into a single string
    user_features = (f"Age: {user_profile['Age']} "
                     f"Gender: {user_profile['Gender']} "
                     f"BMI: {user_profile['BMI']} "
                     f"Smoker: {'Yes' if user_profile['Smoker'] else 'No'} "
                     f"Activity Level: {user_profile['Activity Level']} "
                     f"Medical Condition: {user_profile['Medical Condition']}")
    
    # Combine all user profiles
    all_profiles = health_data.apply(lambda row: (f"Age: {row['Age']} "
                                                  f"Gender: {row['Gender']} "
                                                  f"BMI: {row['BMI']} "
                                                  f"Smoker: {'Yes' if row['Smoker'] else 'No'} "
                                                  f"Activity Level: {row['Activity Level']} "
                                                  f"Medical Condition: {row['Medical Condition']}"), axis=1)
    
    # Create TF-IDF vectors
    vectorizer = TfidfVectorizer()
    profile_vectors = vectorizer.fit_transform(all_profiles)
    user_vector = vectorizer.transform([user_features])
    
    # Calculate cosine similarity
    cosine_similarities = cosine_similarity(user_vector, profile_vectors).flatten()
    
    # Get top similar profiles
    similar_indices = cosine_similarities.argsort()[::-1][1:n_recommendations+1]
    
    # Get recommendations from similar profiles
    recommendations = health_data.iloc[similar_indices]['Health Recommendations'].tolist()
    
    return recommendations

# k-Nearest Neighbors Recommendation System
def knn_recommender(user_profile, n_neighbors=5, n_recommendations=3):
    # Prepare user features
    user_features = np.array([[
        user_profile['Age'],
        user_profile['BMI'],
        user_profile['Activity Level'],
        1 if user_profile['Gender'] == 'Male' else 0,  # Encode gender
        user_profile['Smoker'],
        condition_encoder.transform([user_profile['Medical Condition']])[0]  # Encode medical condition
    ]])
    
    # Standardize only the numerical features
    user_features_std = np.column_stack((
        scaler.transform(user_features[:, :3]),
        user_features[:, 3:]
    ))
    
    # Fit kNN model
    knn_model = NearestNeighbors(n_neighbors=n_neighbors, metric='euclidean')
    knn_model.fit(X)
    
    # Find k nearest neighbors
    distances, indices = knn_model.kneighbors(user_features_std)
    
    # Get recommendations from nearest neighbors
    recommendations = health_data.iloc[indices[0]]['Health Recommendations'].tolist()
    
    return recommendations[:n_recommendations]

# Hybrid Recommender System
def hybrid_recommender(user_profile, n_recommendations=3):
    # Get recommendations from both systems
    content_based_recs = content_based_recommender(user_profile, n_recommendations)
    knn_recs = knn_recommender(user_profile, n_neighbors=5, n_recommendations=n_recommendations)
    
    # Combine recommendations
    all_recs = content_based_recs + knn_recs
    
    # Remove duplicates and get top N recommendations
    unique_recs = list(dict.fromkeys(all_recs))
    return unique_recs[:n_recommendations]

# Example usage of recommenders
user_profile = {
    'Age': 35,
    'Gender': 'Male',
    'BMI': 29.4,
    'Smoker': 0,
    'Activity Level': 3,
    'Medical Condition': 'Obesity'
}

print("\nContent-based Recommendations:")
content_based_recommendations = content_based_recommender(user_profile)
for i, rec in enumerate(content_based_recommendations, 1):
    print(f"{i}. {rec}")

print("\nk-Nearest Neighbors Recommendations:")
knn_recommendations = knn_recommender(user_profile)
for i, rec in enumerate(knn_recommendations, 1):
    print(f"{i}. {rec}")

print("\nHybrid Recommender Recommendations:")
hybrid_recommendations = hybrid_recommender(user_profile)
for i, rec in enumerate(hybrid_recommendations, 1):
    print(f"{i}. {rec}")

# Function to evaluate recommendations
def evaluate_recommendations(true_recommendations, predicted_recommendations):
    true_set = set(true_recommendations.split(' | '))
    predicted_set = set(' | '.join(predicted_recommendations).split(' | '))
    
    precision = len(true_set.intersection(predicted_set)) / len(predicted_set)
    recall = len(true_set.intersection(predicted_set)) / len(true_set)
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return {
        'Precision': precision,
        'Recall': recall,
        'F1-score': f1_score
    }

# Evaluate content-based recommendations
content_based_eval = evaluate_recommendations(recommended_tip, content_based_recommendations)
print("\nContent-based Recommender Evaluation:")
print(content_based_eval)

# Evaluate kNN recommendations
knn_eval = evaluate_recommendations(recommended_tip, knn_recommendations)
print("\nk-Nearest Neighbors Recommender Evaluation:")
print(knn_eval)

def hybrid_recommender(user_profile, n_recommendations=3):
    # Get recommendations from both systems
    content_based_recs = content_based_recommender(user_profile, n_recommendations)
    knn_recs = knn_recommender(user_profile, n_neighbors=5, n_recommendations=n_recommendations)
    
    # Combine recommendations
    all_recs = content_based_recs + knn_recs
    
    # Remove duplicates and get top N recommendations
    unique_recs = list(dict.fromkeys(all_recs))
    return unique_recs[:n_recommendations]

# Example usage of hybrid recommender
hybrid_recommendations = hybrid_recommender(user_profile)
print("\nHybrid Recommender Recommendations:")
for i, rec in enumerate(hybrid_recommendations, 1):
    print(f"{i}. {rec}")

# Evaluate hybrid recommendations
hybrid_eval = evaluate_recommendations(recommended_tip, hybrid_recommendations)
print("\nHybrid Recommender Evaluation:")
print(hybrid_eval)

# Function to compare recommendation systems
def compare_recommenders(user_profile):
    true_recommendation = suggest_health_tips([
        user_profile['Age'],
        user_profile['BMI'],
        user_profile['Activity Level'],
        user_profile['Gender'],
        user_profile['Smoker'],
        user_profile['Medical Condition']
    ])
    
    content_based_recs = content_based_recommender(user_profile)
    knn_recs = knn_recommender(user_profile)
    hybrid_recs = hybrid_recommender(user_profile)
    
    content_based_eval = evaluate_recommendations(true_recommendation, content_based_recs)
    knn_eval = evaluate_recommendations(true_recommendation, knn_recs)
    hybrid_eval = evaluate_recommendations(true_recommendation, hybrid_recs)
    
    print("\nRecommender Systems Comparison:")
    print("Content-based Recommender:")
    print(content_based_eval)
    print("\nk-Nearest Neighbors Recommender:")
    print(knn_eval)
    print("\nHybrid Recommender:")
    print(hybrid_eval)

# Compare recommenders for a sample user profile
compare_recommenders(user_profile)

# Save model and encoders
import joblib

joblib.dump(rf_pipeline, 'rf_pipeline.joblib')
joblib.dump(gender_encoder, 'gender_encoder.joblib')
joblib.dump(condition_encoder, 'condition_encoder.joblib')
joblib.dump(scaler, 'scaler.joblib')

print("\nModel and encoders saved successfully.")

# Function to load model and make predictions
def load_model_and_predict(user_data):
    # Load model and encoders
    loaded_rf_pipeline = joblib.load('rf_pipeline.joblib')
    loaded_gender_encoder = joblib.load('gender_encoder.joblib')
    loaded_condition_encoder = joblib.load('condition_encoder.joblib')
    loaded_scaler = joblib.load('scaler.joblib')
    
    # Prepare user data
    user_input = [
        user_data['Age'],
        user_data['BMI'],
        user_data['Activity Level'],
        loaded_gender_encoder.transform([user_data['Gender']])[0],
        user_data['Smoker'],
        loaded_condition_encoder.transform([user_data['Medical Condition']])[0]
    ]
    
    # Standardize numerical features
    user_input_std = loaded_scaler.transform([[user_input[0], user_input[1], user_input[2]]])
    
    # Combine standardized and non-standardized inputs
    user_input_combined = np.hstack((user_input_std, [[user_input[3], user_input[4], user_input[5]]]))
    
    # Make prediction
    prediction = loaded_rf_pipeline.predict(user_input_combined)
    
    # Convert prediction to readable tips
    predicted_tips = predictions_to_tips(prediction, y.columns)
    
    return predicted_tips[0]

# Example usage of loaded model
new_user_data = {
    'Age': 50,
    'Gender': 'Female',
    'BMI': 27.5,
    'Smoker': 0,
    'Activity Level': 2,
    'Medical Condition': 'Hypertension'
}

loaded_model_prediction = load_model_and_predict(new_user_data)
print("\nPrediction using loaded model:")
print(loaded_model_prediction)

# Additional evaluation metrics
from sklearn.metrics import precision_score, recall_score, f1_score

# Calculate overall precision, recall, and F1-score
overall_precision = precision_score(y_test, y_pred, average='weighted')
overall_recall = recall_score(y_test, y_pred, average='weighted')
overall_f1 = f1_score(y_test, y_pred, average='weighted')

print("\nOverall Model Performance:")
print(f"Precision: {overall_precision:.4f}")
print(f"Recall: {overall_recall:.4f}")
print(f"F1-score: {overall_f1:.4f}")

# Feature importance
feature_importance = rf_pipeline.named_steps['model'].estimators_[0].feature_importances_
feature_names = X.columns

print("\nFeature Importance:")
for name, importance in zip(feature_names, feature_importance):
    print(f"{name}: {importance:.4f}")

# Correlation analysis
correlation_matrix = health_data[['Age', 'BMI', 'Activity Level', 'Smoker']].corr()
print("\nCorrelation Matrix:")
print(correlation_matrix)

# Suggestions for improvement
print("\nSuggestions for Improvement:")
print("1. Collect more diverse and real-world data to improve the model's generalization.")
print("2. Incorporate more features such as diet, sleep patterns, and stress levels.")
print("3. Implement a time-based collaborative filtering approach to capture changes in user health over time.")
print("4. Use ensemble methods to combine different recommendation approaches for better performance.")
print("5. Implement A/B testing to continuously evaluate and improve the recommendation system.")

print("\nEnhanced Health Recommendation System completed successfully.")

   User_ID  ...                             Health Recommendations
0        1  ...  Focus on weight reduction strategies. | Great ...
1        2  ...              Focus on weight reduction strategies.
2        3  ...  Consider counseling, and practice mindfulness ...
3        4  ...  Monitor your blood sugar regularly and follow ...
4        5  ...  Monitor your blood sugar regularly and follow ...

[5 rows x 8 columns]
Training and testing datasets created successfully.
Model built and trained on the training data.
                                                                     precision    recall  f1-score   support

                                                               Tip_       1.00      1.00      1.00       300
   Tip_Adopt a calorie deficit diet and increase physical activity.       1.00      1.00      1.00        24
 Tip_Try to incorporate more physical activities into your routine.       1.00      1.00      1.00       128
                    Tip_Great job maintai