In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline
from sklearn.base import clone

# Load the dataset
df = pd.read_csv('health_metrics.csv')

# Remove any rows with missing values
df.dropna(inplace=True)

# Separate the features (health metrics) from the target variable (fitness score)
X = df.drop('fitness_score', axis=1)
y = df['fitness_score']

# Normalize the features using Min-Max scaling
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Perform outlier detection and removal
outlier_indices = detect_and_remove_outliers(X_scaled, y)
X_scaled = np.delete(X_scaled, outlier_indices, axis=0)
y = np.delete(y, outlier_indices, axis=0)

# Perform feature extraction using PCA
pca = PCA(n_components=5)
X_pca = pca.fit_transform(X_scaled)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# Define the base models for stacking
base_models = [
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor()
]

# Train the base models and generate stacked features
stacked_features_train = np.zeros((X_train.shape[0], len(base_models)))
stacked_features_test = np.zeros((X_test.shape[0], len(base_models)))

for i, model in enumerate(base_models):
    model.fit(X_train, y_train)
    stacked_features_train[:, i] = model.predict(X_train)
    stacked_features_test[:, i] = model.predict(X_test)

# Define the meta model for stacking
meta_model = MLPRegressor()

# Train the meta model on stacked features
meta_model.fit(stacked_features_train, y_train)

# Make predictions using the meta model on stacked features
meta_predictions = meta_model.predict(stacked_features_test)

# Evaluate the meta model's performance
meta_mse = mean_squared_error(y_test, meta_predictions)
meta_r2 = r2_score(y_test, meta_predictions)

print("\nMeta Model:")
print("Mean Squared Error:", meta_mse)
print("R-squared Score:", meta_r2)

# Define the models for ensembling
models = [
    ('Decision Tree', DecisionTreeRegressor()),
    ('Random Forest', RandomForestRegressor()),
    ('Gradient Boosting', GradientBoostingRegressor()),
    ('Neural Network', MLPRegressor())
]

# Create a stacking ensemble with multiple layers
stacking_ensemble = StackingEnsemble(models)

# Train the stacking ensemble
stacking_ensemble.fit(X_train, y_train)

# Make predictions using the stacking ensemble
ensemble_predictions = stacking_ensemble.predict(X_test)

# Evaluate the ensemble model's performance
ensemble_mse = mean_squared_error(y_test, ensemble_predictions)
ensemble_r2 = r2_score(y_test, ensemble_predictions)

print("\nEnsemble Model:")
print("Mean Squared Error:", ensemble_mse)
print("R-squared Score:", ensemble_r2)
