<a href="https://colab.research.google.com/github/bi24rk/StPetersPOI/blob/main/nutrition_fitness_tracker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix, ConfusionMatrixDisplay, mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, Input

# --- Data Loading and Preprocessing ---
# Load the dataset
df = pd.read_csv('nutrition_fitness_data.csv')
print("Dataset Shape:", df.shape)
print("\nDescriptive Statistics:\n", df.describe())
print("Columns in dataset:", df.columns.tolist())

# Calculate BMI and BMI categories
df['bmi'] = df['Weight_kg'] / ((df['Height_cm'] / 100) ** 2)
df['bmi_category'] = pd.cut(df['bmi'], bins=[0, 18.5, 24.9, 29.9, float('inf')],
                            labels=['underweight', 'normal', 'overweight', 'obese'])

# Define features and target for classification
X = df.drop(['bmi', 'bmi_category'], axis=1)
y = df['bmi_category'].cat.codes  # Convert to integers (0-3)

# Create a small dataset (100 rows) for initial testing
df_small = df.iloc[:100]
X_small = df_small.drop(['bmi', 'bmi_category'], axis=1)
y_small = df_small['bmi_category'].cat.codes

# Split the data
X_train_small, X_test_small, y_train_small, y_test_small = train_test_split(X_small, y_small, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define preprocessing pipeline
numeric_features = X.select_dtypes(include=['float64', 'int64']).columns
categorical_features = X.select_dtypes(include=['object']).columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline([('imputer', SimpleImputer(strategy='mean')), ('scaler', StandardScaler())]), numeric_features),
        ('cat', Pipeline([('imputer', SimpleImputer(strategy='most_frequent')), ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))]), categorical_features)
    ])

# --- Model Evaluation Function ---
def evaluate_model(model, X_train, X_test, y_train, y_test, model_name, save_cm=False):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test) if not isinstance(model, Sequential) else model.predict(X_test, verbose=0).argmax(axis=1)
    f1 = f1_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    print(f"{model_name} (F1: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f})")
    if save_cm:
        cm = confusion_matrix(y_test, y_pred)
        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['underweight', 'normal', 'overweight', 'obese'])
        disp.plot(cmap='Blues')
        plt.title(f"Confusion Matrix - {model_name}")
        plt.savefig(f"cm_{model_name.lower().replace(' ', '_')}.png", dpi=300, bbox_inches='tight')
        plt.close()
    return f1

# --- Model Training and Evaluation on Small Dataset (100 rows) ---
print("\nTesting on Small Dataset (100 rows):")
X_train_small_processed = preprocessor.fit_transform(X_train_small)
X_test_small_processed = preprocessor.transform(X_test_small)

# Prepare data for RNN/LSTM
X_train_rnn_small = X_train_small_processed.reshape((X_train_small_processed.shape[0], 1, X_train_small_processed.shape[1]))
X_test_rnn_small = X_test_small_processed.reshape((X_test_small_processed.shape[0], 1, X_test_small_processed.shape[1]))

# Shallow models
pipeline_logreg = Pipeline([('preprocessor', preprocessor), ('classifier', LogisticRegression(max_iter=1000))])
pipeline_svm = Pipeline([('preprocessor', preprocessor), ('classifier', SVC())])
pipeline_rf = Pipeline([('preprocessor', preprocessor), ('classifier', RandomForestClassifier(n_estimators=100))])
pipeline_dt = Pipeline([('preprocessor', preprocessor), ('classifier', DecisionTreeClassifier())])

f1_logreg = evaluate_model(pipeline_logreg, X_train_small, X_test_small, y_train_small, y_test_small, "Logistic Regression")
f1_svm = evaluate_model(pipeline_svm, X_train_small, X_test_small, y_train_small, y_test_small, "SVM")
f1_rf = evaluate_model(pipeline_rf, X_train_small, X_test_small, y_train_small, y_test_small, "Random Forest")
f1_dt = evaluate_model(pipeline_dt, X_train_small, X_test_small, y_train_small, y_test_small, "Decision Tree")

# MLP
mlp = MLPClassifier(hidden_layer_sizes=(200, 100, 50), max_iter=1000, random_state=42)
f1_mlp = evaluate_model(mlp, X_train_small_processed, X_test_small_processed, y_train_small, y_test_small, "MLP")

# RNN
rnn_model_small = Sequential([
    Input(shape=(1, X_train_small_processed.shape[1])),
    SimpleRNN(100, activation='relu', return_sequences=True),
    SimpleRNN(50, activation='relu', return_sequences=False),
    Dense(4, activation='softmax')
])
rnn_model_small.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
rnn_model_small.fit(X_train_rnn_small, y_train_small, epochs=10, batch_size=32, verbose=0)
f1_rnn = evaluate_model(rnn_model_small, X_train_rnn_small, X_test_rnn_small, y_train_small, y_test_small, "RNN")

# LSTM
lstm_model_small = Sequential([
    Input(shape=(1, X_train_small_processed.shape[1])),
    LSTM(128, return_sequences=True),
    LSTM(128, return_sequences=True),
    LSTM(32),
    Dense(4, activation='softmax')
])
lstm_model_small.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
lstm_model_small.fit(X_train_rnn_small, y_train_small, epochs=20, batch_size=32, verbose=0)
f1_lstm = evaluate_model(lstm_model_small, X_train_rnn_small, X_test_rnn_small, y_train_small, y_test_small, "LSTM")

# --- Model Training and Evaluation on Full Dataset ---
print("\nTesting on Full Dataset:")
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

# Prepare data for RNN/LSTM
X_train_rnn_full = X_train_processed.reshape((X_train_processed.shape[0], 1, X_train_processed.shape[1]))
X_test_rnn_full = X_test_processed.reshape((X_test_processed.shape[0], 1, X_test_processed.shape[1]))

# Shallow models
f1_logreg_full = evaluate_model(LogisticRegression(max_iter=1000), X_train_processed, X_test_processed, y_train, y_test, "Logistic Regression (Full)")
f1_svm_full = evaluate_model(SVC(), X_train_processed, X_test_processed, y_train, y_test, "SVM (Full)")
f1_rf_full = evaluate_model(RandomForestClassifier(n_estimators=100), X_train_processed, X_test_processed, y_train, y_test, "Random Forest (Full)", save_cm=True)
f1_dt_full = evaluate_model(DecisionTreeClassifier(), X_train_processed, X_test_processed, y_train, y_test, "Decision Tree (Full)")

# MLP
f1_mlp_full = evaluate_model(MLPClassifier(hidden_layer_sizes=(200, 100, 50), max_iter=1000), X_train_processed, X_test_processed, y_train, y_test, "MLP (Full)")

# RNN
rnn_model_full = Sequential([
    Input(shape=(1, X_train_processed.shape[1])),
    SimpleRNN(100, activation='relu', return_sequences=True),
    SimpleRNN(50, activation='relu', return_sequences=False),
    Dense(4, activation='softmax')
])
rnn_model_full.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
rnn_model_full.fit(X_train_rnn_full, y_train, epochs=10, batch_size=32, verbose=0)
f1_rnn_full = evaluate_model(rnn_model_full, X_train_rnn_full, X_test_rnn_full, y_train, y_test, "RNN (Full)", save_cm=True)

# LSTM
lstm_model_full = Sequential([
    Input(shape=(1, X_train_processed.shape[1])),
    LSTM(128, return_sequences=True),
    LSTM(128, return_sequences=True),
    LSTM(32),
    Dense(4, activation='softmax')
])
lstm_model_full.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
lstm_model_full.fit(X_train_rnn_full, y_train, epochs=20, batch_size=32, verbose=0)
f1_lstm_full = evaluate_model(lstm_model_full, X_train_rnn_full, X_test_rnn_full, y_train, y_test, "LSTM (Full)")

# --- Model Performance Across Data Sizes ---
print("\nTesting Model Performance Across Data Sizes:")
sizes = [500, 1000, 2000, 3000, 4000, 5000]
results = {'size': [], 'svm': [], 'dt': [], 'rf': [], 'mlp': [], 'rnn': [], 'lstm': []}

for size in sizes:
    X_sample = X.sample(n=size, random_state=42)
    y_sample = y[X_sample.index]
    X_train, X_test, y_train, y_test = train_test_split(X_sample, y_sample, test_size=0.2, random_state=42)
    X_train_processed = preprocessor.fit_transform(X_train)
    X_test_processed = preprocessor.transform(X_test)
    X_train_rnn = X_train_processed.reshape((X_train_processed.shape[0], 1, X_train_processed.shape[1]))
    X_test_rnn = X_test_processed.reshape((X_test_processed.shape[0], 1, X_test_processed.shape[1]))

    # Evaluate models
    svm_f1 = evaluate_model(SVC(), X_train_processed, X_test_processed, y_train, y_test, f"SVM ({size} rows)")
    dt_f1 = evaluate_model(DecisionTreeClassifier(), X_train_processed, X_test_processed, y_train, y_test, f"Decision Tree ({size} rows)")
    rf_f1 = evaluate_model(RandomForestClassifier(n_estimators=100), X_train_processed, X_test_processed, y_train, y_test, f"Random Forest ({size} rows)")
    mlp_model = Sequential([
        Input(shape=(X_train_processed.shape[1],)),
        Dense(200, activation='relu'),
        Dense(100, activation='relu'),
        Dense(50, activation='relu'),
        Dense(4, activation='softmax')
    ])
    mlp_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    mlp_model.fit(X_train_processed, y_train, epochs=10, batch_size=32, verbose=0)
    mlp_f1 = evaluate_model(mlp_model, X_train_processed, X_test_processed, y_train, y_test, f"MLP ({size} rows)")
    rnn_model = Sequential([
        Input(shape=(1, X_train_processed.shape[1])),
        SimpleRNN(100, return_sequences=True),
        SimpleRNN(50),
        Dense(4, activation='softmax')
    ])
    rnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    rnn_model.fit(X_train_rnn, y_train, epochs=10, batch_size=32, verbose=0)
    rnn_f1 = evaluate_model(rnn_model, X_train_rnn, X_test_rnn, y_train, y_test, f"RNN ({size} rows)")
    lstm_model = Sequential([
        Input(shape=(1, X_train_processed.shape[1])),
        LSTM(128, return_sequences=True),
        LSTM(128, return_sequences=True),
        LSTM(32),
        Dense(4, activation='softmax')
    ])
    lstm_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    lstm_model.fit(X_train_rnn, y_train, epochs=20, batch_size=32, verbose=0)
    lstm_f1 = evaluate_model(lstm_model, X_train_rnn, X_test_rnn, y_train, y_test, f"LSTM ({size} rows)")

    # Store results
    results['size'].append(size)
    results['svm'].append(svm_f1)
    results['dt'].append(dt_f1)
    results['rf'].append(rf_f1)
    results['mlp'].append(mlp_f1)
    results['rnn'].append(rnn_f1)
    results['lstm'].append(lstm_f1)

# Save and plot results
results_df = pd.DataFrame(results)
results_df.to_csv('results_by_size.csv', index=False)
print("\nResults by Data Size:")
print(results_df)

plt.figure(figsize=(10, 6))
for col in ['svm', 'dt', 'rf', 'mlp', 'rnn', 'lstm']:
    plt.plot(results['size'], results[col], marker='o', label=col.upper())
plt.xlabel('Data Size (Rows)', fontsize=12)
plt.ylabel('F1 Score', fontsize=12)
plt.title('Model Performance vs Data Size', fontsize=14)
plt.legend(fontsize=10)
plt.grid(True)
plt.ylim(0.8, 1.0)
plt.savefig('results_chart.png', dpi=300, bbox_inches='tight')
plt.close()

# --- Feature Impact Analysis (Random Forest) ---
print("\nFeature Impact Analysis (Random Forest):")
initial_features = ['Age', 'Gender', 'Height_cm', 'Weight_kg']
additional_features = ['Daily_Steps', 'Exercise_Frequency', 'Protein_Intake', 'Carbohydrate_Intake', 'Fat_Intake']
feature_subsets = [initial_features]
for feat in additional_features:
    feature_subsets.append(feature_subsets[-1] + [feat])

f1_scores = []
for subset in feature_subsets:
    X_subset = df[subset]
    subset_numeric = X_subset.select_dtypes(include=['float64', 'int64']).columns
    subset_categorical = X_subset.select_dtypes(include=['object']).columns
    subset_preprocessor = ColumnTransformer(
        transformers=[
            ('num', Pipeline([('imputer', SimpleImputer(strategy='mean')), ('scaler', StandardScaler())]), subset_numeric),
            ('cat', Pipeline([('imputer', SimpleImputer(strategy='most_frequent')), ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))]), subset_categorical)
        ])
    X_train_sub, X_test_sub, y_train_sub, y_test_sub = train_test_split(X_subset, y, test_size=0.2, random_state=42)
    X_train_sub_processed = subset_preprocessor.fit_transform(X_train_sub)
    X_test_sub_processed = subset_preprocessor.transform(X_test_sub)
    rf_subset = RandomForestClassifier(n_estimators=100)
    rf_subset.fit(X_train_sub_processed, y_train_sub)
    y_pred_sub = rf_subset.predict(X_test_sub_processed)
    f1_sub = f1_score(y_test_sub, y_pred_sub, average='weighted')
    f1_scores.append(f1_sub)
    print(f"Features: {subset}, F1: {f1_sub:.4f}")

plt.figure(figsize=(10, 6))
plt.plot(range(len(feature_subsets)), f1_scores, marker='o')
plt.xticks(range(len(feature_subsets)), [str(subset) for subset in feature_subsets], rotation=45)
plt.xlabel('Feature Subset')
plt.ylabel('F1-Score')
plt.title('Feature Impact on Random Forest F1-Score')
plt.tight_layout()
plt.savefig('feature_impact_plot.png', dpi=300, bbox_inches='tight')
plt.close()

# --- Data Visualizations ---
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.hist(df['Age'], bins=20, color='skyblue', edgecolor='black')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.title('Distribution of Age')
plt.subplot(1, 2, 2)
df['Gender'].value_counts().plot(kind='bar', color='lightgreen')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.title('Distribution of Gender')
plt.tight_layout()
plt.savefig('dist_age_gender.png', dpi=300, bbox_inches='tight')
plt.close()

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
sns.scatterplot(x='Age', y='bmi', hue='Gender', data=df)
plt.title('BMI vs. Age by Gender')
plt.subplot(1, 2, 2)
sns.boxplot(x='bmi_category', y='Protein_Intake', data=df)
plt.title('Protein Intake by BMI Category')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('bmi_age_protein.png', dpi=300, bbox_inches='tight')
plt.close()

plt.figure(figsize=(8, 6))
sns.countplot(x='Chronic_Disease', data=df)
plt.title('Distribution of Chronic Diseases')
plt.xlabel('Chronic Disease')
plt.ylabel('Count')
plt.savefig('dist_chronic_disease.png', dpi=300, bbox_inches='tight')
plt.close()

plt.figure(figsize=(8, 6))
df['Preferred_Cuisine'].value_counts().plot(kind='bar', color='salmon')
plt.xlabel('Preferred Cuisine')
plt.ylabel('Count')
plt.title('Distribution of Preferred Cuisine')
plt.savefig('dist_preferred_cuisine.png', dpi=300, bbox_inches='tight')
plt.close()

corr_matrix = df[numeric_features].corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', mask=np.triu(corr_matrix))
plt.title('Correlation Matrix')
plt.savefig('corr_matrix.png', dpi=300, bbox_inches='tight')
plt.close()

# --- Random Forest Regression (Excluding Height/Weight) ---
print("\nRandom Forest Regression (Excluding Height/Weight):")
X_reg = df.drop(['bmi', 'bmi_category', 'Height_cm', 'Weight_kg'], axis=1)
y_reg = df['bmi']
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

numeric_features_reg = X_reg.select_dtypes(include=['float64', 'int64']).columns
categorical_features_reg = X_reg.select_dtypes(include=['object']).columns
preprocessor_reg = ColumnTransformer(
    transformers=[
        ('num', Pipeline([('imputer', SimpleImputer(strategy='mean')), ('scaler', StandardScaler())]), numeric_features_reg),
        ('cat', Pipeline([('imputer', SimpleImputer(strategy='most_frequent')), ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))]), categorical_features_reg)
    ])

X_train_reg_processed = preprocessor_reg.fit_transform(X_train_reg)
X_test_reg_processed = preprocessor_reg.transform(X_test_reg)

rf_reg = RandomForestRegressor(n_estimators=100)
rf_reg.fit(X_train_reg_processed, y_train_reg)
y_pred_reg = rf_reg.predict(X_test_reg_processed)
mse = mean_squared_error(y_test_reg, y_pred_reg)
r2 = r2_score(y_test_reg, y_pred_reg)
print(f"Random Forest Regression (MSE: {mse:.4f}, R²: {r2:.4f}")

Dataset Shape: (5000, 30)

Descriptive Statistics:
                Age    Height_cm   Weight_kg          BMI  \
count  5000.000000  5000.000000  5000.00000  5000.000000   
mean     48.805600   174.244000    84.36620    28.353134   
std      17.906991    14.229173    20.18103     8.297745   
min      18.000000   150.000000    50.00000    12.630000   
25%      34.000000   162.000000    67.00000    21.850000   
50%      49.000000   174.000000    84.00000    27.640000   
75%      64.000000   186.000000   102.00000    33.812500   
max      79.000000   199.000000   119.00000    52.890000   

       Blood_Pressure_Systolic  Blood_Pressure_Diastolic  Cholesterol_Level  \
count              5000.000000               5000.000000        5000.000000   
mean                133.982400                 89.735800         224.297800   
std                  26.216215                 17.283025          42.918923   
min                  90.000000                 60.000000         150.000000   
25%         

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Random Forest (F1: 0.6221, Precision: 0.5917, Recall: 0.7000)
Decision Tree (F1: 0.9469, Precision: 0.9571, Recall: 0.9500)
MLP (F1: 0.6706, Precision: 0.6571, Recall: 0.7000)
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.8133 - loss: 0.7183


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


RNN (F1: 0.4491, Precision: 0.4217, Recall: 0.5000)
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.7828 - loss: 0.5318


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


LSTM (F1: 0.5364, Precision: 0.5167, Recall: 0.6000)

Testing on Full Dataset:
Logistic Regression (Full) (F1: 0.9739, Precision: 0.9748, Recall: 0.9740)
SVM (Full) (F1: 0.9255, Precision: 0.9283, Recall: 0.9260)
Random Forest (Full) (F1: 0.8825, Precision: 0.9190, Recall: 0.8930)
Decision Tree (Full) (F1: 1.0000, Precision: 1.0000, Recall: 1.0000)
MLP (Full) (F1: 0.9819, Precision: 0.9820, Recall: 0.9820)
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 1.0000 - loss: 0.0011
RNN (Full) (F1: 0.9850, Precision: 0.9850, Recall: 0.9850)
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 54ms/step - accuracy: 1.0000 - loss: 4.1035e-04
LSTM (Full) (F1: 0.9789, Precision: 0.9791, Recall: 0.9790)

Testing Model Performance Across Data Sizes:
SVM (500 rows) (F1: 0.8224, Precision: 0.8546, Recall: 0.8200)
Decision Tree (500 rows) (F1: 1.0000, Precision: 1.0000, Recall: 1.0000)
Random Forest (500 rows) (F1: 0.8551, Precision: 0.8944, Recall: 



RNN (500 rows) (F1: 0.8619, Precision: 0.8669, Recall: 0.8600)
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 1.0000 - loss: 0.0063
LSTM (500 rows) (F1: 0.9496, Precision: 0.9496, Recall: 0.9500)
SVM (1000 rows) (F1: 0.8385, Precision: 0.8447, Recall: 0.8400)
Decision Tree (1000 rows) (F1: 0.9901, Precision: 0.9905, Recall: 0.9900)
Random Forest (1000 rows) (F1: 0.8509, Precision: 0.8830, Recall: 0.8550)
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.0030
MLP (1000 rows) (F1: 0.9145, Precision: 0.9160, Recall: 0.9150)
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 1.0000 - loss: 0.0495
RNN (1000 rows) (F1: 0.9098, Precision: 0.9108, Recall: 0.9100)
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 1.0000 - loss: 0.0028
LSTM (1000 rows) (F1: 0.9344, Precision: 0.9370, Recall: 0.9350)
SVM (2000 rows) (F1: 0.9249, Precisio

  plt.tight_layout()



Random Forest Regression (Excluding Height/Weight):
Random Forest Regression (MSE: 0.0013, R²: 1.0000
