![image-2.png](attachment:image-2.png)

In [2]:
!pip install gmdh

Defaulting to user installation because normal site-packages is not writeable
Collecting gmdh
  Using cached gmdh-1.0.3-cp39-cp39-win_amd64.whl (361 kB)
Collecting docstring-inheritance
  Using cached docstring_inheritance-2.2.2-py3-none-any.whl (24 kB)
Installing collected packages: docstring-inheritance, gmdh
Successfully installed docstring-inheritance-2.2.2 gmdh-1.0.3


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
# Метрики
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import math
from itertools import combinations
from sklearn.ensemble import StackingRegressor, RandomForestRegressor
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from gmdh import Combi, Ria, CriterionType, Criterion
import gmdh

In [7]:
df = pd.read_csv("gym_members_exercise_tracking.csv")
df.head()

Unnamed: 0,Age,Gender,Weight (kg),Height (m),Max_BPM,Avg_BPM,Resting_BPM,Session_Duration (hours),Calories_Burned,Workout_Type,Fat_Percentage,Water_Intake (liters),Workout_Frequency (days/week),Experience_Level,BMI
0,56,Male,88.3,1.71,180,157,60,1.69,1313.0,Yoga,12.6,3.5,4,3,30.2
1,46,Female,74.9,1.53,179,151,66,1.3,883.0,HIIT,33.9,2.1,4,2,32.0
2,32,Female,68.1,1.66,167,122,54,1.11,677.0,Cardio,33.4,2.3,4,2,24.71
3,25,Male,53.2,1.7,190,164,56,0.59,532.0,Strength,28.8,2.1,3,1,18.41
4,38,Male,46.1,1.79,188,158,68,0.64,556.0,Strength,29.2,2.8,3,1,14.39


In [8]:
# Кодируем категориальные признаки 
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])
df['Workout_Type'] = label_encoder.fit_transform(df['Workout_Type'])
df['Experience_Level'] = label_encoder.fit_transform(df['Experience_Level'])

### Масштабирование

In [9]:
numerical_features = ['Age', 'Weight (kg)', 'Height (m)', 'Max_BPM', 'Avg_BPM', 'Resting_BPM', 'Session_Duration (hours)',
                      'Calories_Burned', 'Workout_Type', 'Fat_Percentage', 'Water_Intake (liters)', 'Workout_Frequency (days/week)', 'BMI']

In [10]:
scaler = StandardScaler()
scaler.fit(df[numerical_features]) # Обучаем StandardScaler на numerical_features
df[numerical_features] = scaler.transform(df[numerical_features]) # масштабируем числовые признаки

### Разделяем датасет

In [11]:
X = df.drop(['Experience_Level'], axis=1)
y_experience = df['Experience_Level']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y_experience, test_size=0.2, random_state=42)
print('Train size:', X_train.shape[0])
print('Test_size:',X_test.shape[0])

Train size: 778
Test_size: 195


### Обучение модели группы стекинга

In [28]:
X_train_np = X_train.values
X_test_np = X_test.values
y_train_np = y_train.values
y_test_np = y_test.values


estimators = [
    ('bagging', BaggingRegressor(random_state=52, n_estimators=100, n_jobs=-1)),
    ('svr', SVR()), # Нелинейная модель
]
stacking_model = StackingRegressor(
    estimators=estimators,
    final_estimator=Ridge(random_state=52),
    cv=5,
    n_jobs=-1
)
stacking_model.fit(X_train, y_train)

StackingRegressor(cv=5,
                  estimators=[('bagging',
                               BaggingRegressor(n_estimators=100, n_jobs=-1,
                                                random_state=52)),
                              ('svr', SVR())],
                  final_estimator=Ridge(random_state=52), n_jobs=-1)

### Многослойный Перцептрон

In [19]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_np)
X_test_scaled = scaler.transform(X_test_np)

mlp_model = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    max_iter=1000,
    random_state=42,
    alpha=0.0001,
    early_stopping=True,
    validation_fraction=0.1,
    n_iter_no_change=10,
    verbose=False
)
mlp_model.fit(X_train_scaled, y_train_np)

MLPRegressor(early_stopping=True, hidden_layer_sizes=(100, 50), max_iter=1000,
             random_state=42)

### COMBI

In [20]:
combi_model = Combi()
combi_model.fit(X_train_np, y_train_np, verbose=1)
# combi_model.fit(X_train2, y_train2, verbose=1)
# combi_model.fit(X_train_np[:3], y_train_np[:3])
print(f"Лучший полином COMBI: {combi_model.get_best_polynomial()}")

Лучший полином COMBI: y = - 0.1054*x2 + 0.0064*x7 + 0.2382*x8 + 0.0063*x10 - 0.1758*x11 + 0.3812*x13 + 0.007*x14 + 0.8642


### RIA

In [21]:
ria_model = Ria()
reg_criterion = Criterion(criterion_type=CriterionType.REGULARITY)
ria_model.fit(X_train_np, y_train_np, criterion=reg_criterion, k_best=5, p_average=3, verbose=1)
print(f"Лучший полином RIA: {ria_model.get_best_polynomial()}")

Лучший полином RIA: f1 = 0.3353*x8 + 0.3991*x13 + 0.1454*x8*x13 - 0.0042*x8^2 - 0.0915*x13^2 + 0.8163
f2 = - 0.0811*x11 + 1.1803*f1 - 0.0627*x11*f1 + 0.0323*x11^2 - 0.2003*f1^2 + 0.0115
f3 = 1.003*f2 - 0.1372*x2*f2 + 0.0436*x2^2 + 0.0441*f2^2 - 0.0108
f4 = - 0.2091*x8 + 1.1116*f3 + 0.1895*x8*f3 - 0.1129*x8^2 - 0.0466*f3^2 - 0.0413
f5 = 0.8061*x13 + 0.0444*f4 - 1.1182*x13*f4 + 0.4536*x13^2 + 0.6718*f4^2 + 0.2673
f6 = - 0.4127*x13 + 1.4552*f5 + 0.5035*x13*f5 - 0.2162*x13^2 - 0.2747*f5^2 - 0.156
f7 = 0.0069*x10 + 1.0179*f6 + 0.0015*x10*f6 + 0.0234*x10^2 - 0.0082*f6^2 - 0.0282
f8 = 0.026*x8 + 0.9454*f7 - 0.0881*x8*f7 + 0.0232*x8^2 + 0.0705*f7^2 - 0.0086
f9 = 0.0721*x11 + 1.0089*f8 - 0.0457*x11*f8 - 0.0316*x11^2 + 0.0155*f8^2 - 0.0146
f10 = 0.0006*x7 + 1.0137*f9 + 0.0036*x7*f9 + 0.0082*x7^2 - 0.007*f9^2 - 0.0115
f11 = - 6.18405e-05*x7 + 0.9979*f10 + 3.61154e-05*x7*f10 - 1.57653e-05*x7^2 + 0.0011*f10^2 + 0.0006
f12 = - 0.0006*x8 + 1.0016*f11 - 0.0048*x8*f11 + 0.0019*x8^2 + 0.0027*f11^2 - 0.0

### Оценка качества моделей

In [27]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

models_to_evaluate = {
    "Stacking": {'model': stacking_model, 'scaled': False},
    "MLP": {'model': mlp_model, 'scaled': True},
    "GMDH_COMBI" : {'model': combi_model, 'scaled': False},
    "GMDH_RIA" : {'model': ria_model, 'scaled': False}
}

metrics_results = {}

for name, info in models_to_evaluate.items():
    model = info['model']
    is_scaled = info['scaled']
    X_test_current = X_test_scaled if is_scaled else X_test_np
    y_pred = model.predict(X_test_current)

    # Расчет метрик
    r2 = r2_score(y_test_np, y_pred)
    mae = mean_absolute_error(y_test_np, y_pred)
    mse = mean_squared_error(y_test_np, y_pred)
    rmse = math.sqrt(mse)

    metrics_results[name] = {
        'R^2': r2,
        'MAE': mae,
        'MSE': mse,
        'RMSE': rmse
    }


metrics_df = pd.DataFrame(metrics_results).T
metrics_df = metrics_df[['R^2', 'MAE', 'MSE', 'RMSE']]
print(metrics_df.round(4))

               R^2     MAE     MSE    RMSE
Stacking    0.9032  0.1254  0.0562  0.2372
MLP         0.8005  0.2788  0.1160  0.3406
GMDH_COMBI  0.8353  0.2559  0.0958  0.3094
GMDH_RIA    0.8720  0.1947  0.0744  0.2728




### Результаты 5 Лабы

![image.png](attachment:image.png)

Bagging без стейкинга все равно оказался более подходящей моделью