In [2]:
import pandas as pd
import numpy as np
import json
from pathlib import Path

# --- –ò–º–ø–æ—Ä—Ç—ã –¥–ª—è –ú–æ–¥–µ–ª–µ–π ---
from sklearn.model_selection import GroupShuffleSplit
from sklearn.metrics import (
    accuracy_score, f1_score, roc_auc_score, precision_score, recall_score
)
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# --- –ò–º–ø–æ—Ä—Ç—ã –¥–ª—è –≠–∫—Å–ø–æ—Ä—Ç–∞ ---
try:
    import skl2onnx
    from skl2onnx import convert_sklearn
    from skl2onnx.common.data_types import FloatTensorType
except ImportError:
    print("–í–Ω–∏–º–∞–Ω–∏–µ: 'skl2onnx' –Ω–µ —É—Å—Ç–∞–Ω–æ–≤–ª–µ–Ω. –≠–∫—Å–ø–æ—Ä—Ç –≤ ONNX –Ω–µ —Å—Ä–∞–±–æ—Ç–∞–µ—Ç.")
    print("–£—Å—Ç–∞–Ω–æ–≤–∏—Ç–µ: pip install skl2onnx")


# ===================================================================
# --- –®–∞–≥ 1: –§—É–Ω–∫—Ü–∏–∏ –ø–æ–¥–≥–æ—Ç–æ–≤–∫–∏ –¥–∞–Ω–Ω—ã—Ö ---
# ===================================================================

def _zscore(s: pd.Series, window=7, min_periods=3) -> pd.Series:
    """–†–∞—Å—á–µ—Ç Z-–æ—Ü–µ–Ω–∫–∏ –¥–ª—è –≤—Ä–µ–º–µ–Ω–Ω–æ–≥–æ —Ä—è–¥–∞, –∏–≥–Ω–æ—Ä–∏—Ä—É—è std=0."""
    m = s.rolling(window, min_periods=min_periods).mean()
    sd = s.rolling(window, min_periods=min_periods).std()
    sd = sd.replace(0, np.nan) 
    return (s - m) / sd

def _delta_from_mean(s: pd.Series, window=7, min_periods=3) -> pd.Series:
    """–†–∞—Å—á–µ—Ç –¥–µ–ª—å—Ç—ã –æ—Ç —Å–∫–æ–ª—å–∑—è—â–µ–≥–æ —Å—Ä–µ–¥–Ω–µ–≥–æ."""
    m = s.rolling(window, min_periods=min_periods).mean()
    return s - m

def create_fatigue_model_dataset(health_fitness_dataset: str) -> pd.DataFrame:
    """
    –ó–∞–≥—Ä—É–∂–∞–µ—Ç —Å—ã—Ä–æ–π –¥–∞—Ç–∞—Å–µ—Ç, –æ—á–∏—â–∞–µ—Ç –µ–≥–æ, –ò–ì–ù–û–†–ò–†–£–Ø –ü–£–õ–¨–°,
    –∏ —Å–æ–∑–¥–∞–µ—Ç –Ω–æ–≤—É—é, –∏–Ω–∂–µ–Ω–µ—Ä–Ω—É—é —Ü–µ–ª—å 'y_target_fatigue'.
    """
    
    print(f"--- –ó–∞–≥—Ä—É–∑–∫–∞ —Å—ã—Ä—ã—Ö –¥–∞–Ω–Ω—ã—Ö –∏–∑ {health_fitness_dataset} ---")
    try:
        # --- –ò–°–ü–†–ê–í–õ–ï–ù–û: –û—à–∏–±–∫–∞ 1 (—É–±—Ä–∞–Ω–æ .csv) ---
        df = pd.read_csv(health_fitness_dataset)
    except FileNotFoundError:
        print(f"–û–®–ò–ë–ö–ê: –§–∞–π–ª –Ω–µ –Ω–∞–π–¥–µ–Ω –ø–æ –ø—É—Ç–∏ {health_fitness_dataset}")
        return pd.DataFrame()

    # --- 1.1: –ü–µ—Ä–µ–∏–º–µ–Ω–æ–≤–∞–Ω–∏–µ (—Ç–æ–ª—å–∫–æ –Ω—É–∂–Ω—ã—Ö –Ω–∞–º —Ñ–∏—á) ---
    df = df.rename(columns={
        'participant_id': 'user_id',
        'daily_steps': 'steps_total',
        'hours_sleep': 'sleep_hours_total',
        'calories_burned': 'calories_total',
        # –ú—ã –Ω–∞–º–µ—Ä–µ–Ω–Ω–æ –ò–ì–ù–û–†–ò–†–£–ï–ú 'avg_heart_rate'
    })

    # --- 1.2: –ë–∞–∑–æ–≤–∞—è –æ–±—Ä–∞–±–æ—Ç–∫–∞ (—Ç–∏–ø—ã, gender) ---
    df['date'] = pd.to_datetime(df['date'])
    df['gender_numeric'] = df['gender'].map({'M': 1, 'F': 0})

    # --- 1.3: –†–∞—Å—á–µ—Ç –ø—Ä–æ–∏–∑–≤–æ–¥–Ω—ã—Ö —Ñ–∏—á (–ë–ï–ó –ü–£–õ–¨–°–ê) ---
    print("–†–∞—Å—á–µ—Ç Z-scores –∏ Deltas –¥–ª—è —Å–Ω–∞ –∏ —à–∞–≥–æ–≤...")
    df = df.sort_values(['user_id', 'date'])

    # –†–∞—Å—Å—á–∏—Ç—ã–≤–∞–µ–º Z-–æ—Ü–µ–Ω–∫–∏ –∏ –î–µ–ª—å—Ç—ã –∑–∞ 7 –¥–Ω–µ–π
    df['z_sleep_7d'] = df.groupby('user_id', group_keys=False)['sleep_hours_total'].apply(_zscore)
    df['z_steps_7d'] = df.groupby('user_id', group_keys=False)['steps_total'].apply(_zscore)
    df['d_sleep_7d'] = df.groupby('user_id', group_keys=False)['sleep_hours_total'].apply(_delta_from_mean)
    df['d_steps_7d'] = df.groupby('user_id', group_keys=False)['steps_total'].apply(_delta_from_mean)
    
    # --- 1.4: –ö–†–ò–¢–ò–ß–ï–°–ö–ê–Ø –û–ß–ò–°–¢–ö–ê (NaN) ---
    # –ú—ã –¥–æ–ª–∂–Ω—ã —É–¥–∞–ª–∏—Ç—å NaN –î–û —Å–æ–∑–¥–∞–Ω–∏—è —Ü–µ–ª–∏, —á—Ç–æ–±—ã Z-–æ—Ü–µ–Ω–∫–∏ –±—ã–ª–∏ —á–∏—Å—Ç—ã–º–∏
    features_to_check_na = ['z_sleep_7d', 'z_steps_7d', 'd_sleep_7d', 'd_steps_7d']
    start_len = len(df)
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df_cleaned = df.dropna(subset=features_to_check_na)
    print(f"–£–¥–∞–ª–µ–Ω–æ {start_len - len(df_cleaned)} —Å—Ç—Ä–æ–∫ (–ø–µ—Ä–∏–æ–¥ '–ø—Ä–æ–≥—Ä–µ–≤–∞' Z-–æ—Ü–µ–Ω–æ–∫).")

    # --- 1.5: –°–æ–∑–¥–∞–Ω–∏–µ –ù–û–í–û–ô —Ü–µ–ª–∏ 'y_target_fatigue' (–ò–Ω–∂–∏–Ω–∏—Ä–∏–Ω–≥) ---
    # –ú—ã –ò–ì–ù–û–†–ò–†–£–ï–ú 'stress_level'. –ú—ã —Å–æ–∑–¥–∞–µ–º –°–í–û–Æ —Ü–µ–ª—å "–£—Å—Ç–∞–ª–æ—Å—Ç—å".
    
    # –ù–ê–®–ï –û–ü–†–ï–î–ï–õ–ï–ù–ò–ï "–£–°–¢–ê–õ–û–°–¢–ò" (1):
    # –î–µ–Ω—å, –∫–æ–≥–¥–∞ —Å–æ–Ω –±—ã–ª –ó–ù–ê–ß–ò–¢–ï–õ–¨–ù–û –ù–ò–ñ–ï –Ω–æ—Ä–º—ã (z < -1.0)
    # –ò–õ–ò –∞–∫—Ç–∏–≤–Ω–æ—Å—Ç—å –±—ã–ª–∞ –ó–ù–ê–ß–ò–¢–ï–õ–¨–ù–û –í–´–®–ï –Ω–æ—Ä–º—ã (z > 1.5)
    
    df_cleaned['y_target_fatigue'] = (
        (df_cleaned['z_sleep_7d'] < -1.0) |  # –°–æ–Ω –Ω–∞ 1 std –Ω–∏–∂–µ –Ω–æ—Ä–º—ã
        (df_cleaned['z_steps_7d'] >  1.5)    # –®–∞–≥–∏ –Ω–∞ 1.5 std –≤—ã—à–µ –Ω–æ—Ä–º—ã
    ).astype(int)

    print("\n--- –ë–∞–ª–∞–Ω—Å –Ω–æ–≤–æ–π —Ü–µ–ª–∏ 'y_target_fatigue' ---")
    print(df_cleaned['y_target_fatigue'].value_counts(normalize=True))
    print("-------------------------------------------------")

    # --- 1.6: –í—ã–±–æ—Ä —Ñ–∏–Ω–∞–ª—å–Ω—ã—Ö —Å—Ç–æ–ª–±—Ü–æ–≤ ---
    FINAL_COLS_NEEDED = [
        'user_id',        # –î–ª—è GroupShuffleSplit
        'y_target_fatigue', # –ù–∞—à–∞ –ù–û–í–ê–Ø —Ü–µ–ª—å
        
        # –§–∏—á–∏, –∫–æ—Ç–æ—Ä—ã–µ –±—É–¥—É—Ç —É –Ω–∞—Å —Å —Ç–µ–ª–µ—Ñ–æ–Ω–∞:
        'steps_total', 
        'calories_total', 
        'sleep_hours_total',
        'age', 
        'gender_numeric', 
        'height_cm', 
        'weight_kg',
        
        # –§–∏—á–∏, –∫–æ—Ç–æ—Ä—ã–µ –º—ã —Ä–∞—Å—Å—á–∏—Ç–∞–ª–∏:
        'z_sleep_7d',
        'z_steps_7d',
        'd_sleep_7d',
        'd_steps_7d'
    ]
    
    cols_exist = [col for col in FINAL_COLS_NEEDED if col in df_cleaned.columns]
    df_final = df_cleaned[cols_exist].copy()
    
    print("--- –û—á–∏—Å—Ç–∫–∞ –∏ —Å–æ–∑–¥–∞–Ω–∏–µ —Ñ–∏—á –∑–∞–≤–µ—Ä—à–µ–Ω—ã ---")
    return df_final


# ===================================================================
# --- –û–°–ù–û–í–ù–û–ô –°–ö–†–ò–ü–¢ (–í–´–ü–û–õ–ù–ï–ù–ò–ï) ---
# ===================================================================

# === –®–∞–≥ 0: –ü–∞—Ä–∞–º–µ—Ç—Ä—ã ===

# --- –ò–°–ü–†–ê–í–¨–¢–ï –≠–¢–£ –°–¢–†–û–ö–£ (–û—à–∏–±–∫–∞ 2) ---
# –£–∫–∞–∂–∏—Ç–µ –ü–û–õ–ù–´–ô –ø—É—Ç—å –∫ –≤–∞—à–µ–º—É —Ñ–∞–π–ª—É, –µ—Å–ª–∏ –æ–Ω –Ω–µ –ª–µ–∂–∏—Ç –≤ —Ç–æ–π –∂–µ –ø–∞–ø–∫–µ
CSV_PATH = "health_fitness_dataset.csv"  
# –ù–∞–ø—Ä–∏–º–µ—Ä: "C:\\Users\\User\\Downloads\\health_fitness_dataset.csv"
# –ò–ª–∏: "/Users/User/Downloads/health_fitness_dataset.csv"

EXPORT_DIR = Path("export")
EXPORT_DIR.mkdir(exist_ok=True)
ONNX_PATH = EXPORT_DIR / "fatigue_model_v1.onnx"
FEATURES_JSON = EXPORT_DIR / "fatigue_model_v1_features.json"
METRICS_JSON = EXPORT_DIR / "fatigue_model_v1_metrics.json"


# === –®–∞–≥ 1: –ó–∞–ø—É—Å–∫ –ø–æ–¥–≥–æ—Ç–æ–≤–∫–∏ –¥–∞–Ω–Ω—ã—Ö ===
print("=== –®–∞–≥ 1: –ó–∞–ø—É—Å–∫ –æ—á–∏—Å—Ç–∫–∏ –∏ –ø–æ–¥–≥–æ—Ç–æ–≤–∫–∏ –¥–∞–Ω–Ω—ã—Ö... ===")
df_ready = create_fatigue_model_dataset(CSV_PATH)


# === –®–∞–≥ 2: –û–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ X, y, groups ===
FEATURES = [] 
if not df_ready.empty:
    print("\n=== –®–∞–≥ 2: –û–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ X, y, –∏ groups... ===")
    
    # –ù–∞—à –ù–û–í–´–ô —Å–ø–∏—Å–æ–∫ —Ñ–∏—á (–ë–ï–ó –ü–£–õ–¨–°–ê)
    FEATURES = [
        # –°—ã—Ä—ã–µ –¥–∞–Ω–Ω—ã–µ
        'steps_total', 
        'calories_total', 
        'sleep_hours_total',
        # –°—Ç–∞—Ç–∏–∫–∞
        'age', 
        'gender_numeric', 
        'height_cm', 
        'weight_kg',
        # –ü—Ä–æ–∏–∑–≤–æ–¥–Ω—ã–µ (–î–µ–ª—å—Ç—ã - —ç—Ç–æ –Ω–æ—Ä–º–∞–ª—å–Ω–æ, –æ–Ω–∏ –Ω–µ –ø—Ä—è–º–∞—è —É—Ç–µ—á–∫–∞)
        'd_sleep_7d',
        'd_steps_7d'
    ]

    # .fillna(0.0) - –±–µ–∑–æ–ø–∞—Å–Ω–æ –¥–ª—è (age, gender, height...)
    X = df_ready[FEATURES].fillna(0.0).values
    y = df_ready['y_target_fatigue'].values # –ù–∞—à–∞ –Ω–æ–≤–∞—è —Ü–µ–ª—å
    groups = df_ready['user_id'].values

    print(f"--- –®–∞–≥ 2: X, y, groups —Å–æ–∑–¥–∞–Ω—ã. {len(FEATURES)} —Ñ–∏—á ---")
else:
    print("\n–û–®–ò–ë–ö–ê: 'df_ready' –ø—É—Å—Ç. –í—ã–ø–æ–ª–Ω–µ–Ω–∏–µ –æ—Å—Ç–∞–Ω–æ–≤–ª–µ–Ω–æ.")


# === –®–∞–≥ 3: –†–∞–∑–¥–µ–ª–µ–Ω–∏–µ –Ω–∞ Train/Test (–ø–æ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è–º) ===
if 'X' in locals() and len(X) > 0:
    print(f"\n=== –®–∞–≥ 3: –†–∞–∑–¥–µ–ª–µ–Ω–∏–µ {len(np.unique(groups))} –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π... ===")
    
    gss = GroupShuffleSplit(n_splits=1, test_size=0.20, random_state=42)
    train_idx, test_idx = next(gss.split(X, y, groups=groups))

    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    print(f"–†–∞–∑–¥–µ–ª–µ–Ω–∏–µ –∑–∞–≤–µ—Ä—à–µ–Ω–æ: {len(X_train)} train, {len(X_test)} test.")
else:
    print("\n–û–®–ò–ë–ö–ê: X –Ω–µ —Å–æ–∑–¥–∞–Ω. –ù–µ –º–æ–≥—É —Ä–∞–∑–¥–µ–ª–∏—Ç—å –¥–∞–Ω–Ω—ã–µ.")


# === –®–∞–≥ 4: –û–±—É—á–µ–Ω–∏–µ –º–æ–¥–µ–ª–µ–π ===
if 'X_train' in locals() and len(X_train) > 0:
    print("\n=== –®–∞–≥ 4: –û–±—É—á–µ–Ω–∏–µ –º–æ–¥–µ–ª–µ–π... ===")
    models = {}

    # --- 4.1 Logistic Regression (Baseline) ---
    print("–û–±—É—á–µ–Ω–∏–µ 1/2: Logistic Regression...")
    logreg = Pipeline(steps=[
        ("scaler", StandardScaler()),
        ("clf", LogisticRegression(
            max_iter=2000,
            class_weight='balanced', # –ò—Å–ø–æ–ª—å–∑—É–µ–º 'balanced', —Ç.–∫. —Ü–µ–ª—å –º–æ–∂–µ—Ç –±—ã—Ç—å (70/30)
            random_state=42
        ))
    ])
    logreg.fit(X_train, y_train)
    proba_lr = logreg.predict_proba(X_test)[:, 1]
    pred_lr  = (proba_lr >= 0.5).astype(int)
    models['logreg'] = {
        "model": logreg,
        "metrics": {
            "acc": accuracy_score(y_test, pred_lr),
            "f1": f1_score(y_test, pred_lr),
            "auc": roc_auc_score(y_test, proba_lr),
            "precision": precision_score(y_test, pred_lr),
            "recall": recall_score(y_test, pred_lr),
        }
    }

    # --- 4.2 Random Forest (Main Model) ---
    print("–û–±—É—á–µ–Ω–∏–µ 2/2: Random Forest...")
    rf = RandomForestClassifier(
        n_estimators=300, # 300 - —Ö–æ—Ä–æ—à–∏–π –∫–æ–º–ø—Ä–æ–º–∏—Å—Å
        max_depth=None,
        min_samples_leaf=5, # 5 - –¥–ª—è –ª—É—á—à–µ–≥–æ –æ–±–æ–±—â–µ–Ω–∏—è
        random_state=42,
        n_jobs=-1,
        class_weight='balanced_subsample' 
    )
    rf.fit(X_train, y_train)
    proba_rf = rf.predict_proba(X_test)[:, 1]
    pred_rf  = (proba_rf >= 0.5).astype(int)
    models['rf'] = {
        "model": rf,
        "metrics": {
            "acc": accuracy_score(y_test, pred_rf),
            "f1": f1_score(y_test, pred_rf),
            "auc": roc_auc_score(y_test, proba_rf),
            "precision": precision_score(y_test, pred_rf),
            "recall": recall_score(y_test, pred_rf),
        }
    }
    print("--- –û–±—É—á–µ–Ω–∏–µ –∑–∞–≤–µ—Ä—à–µ–Ω–æ. ---")
else:
    print("\n–û–®–ò–ë–ö–ê: X_train –Ω–µ –Ω–∞–π–¥–µ–Ω. –û–±—É—á–µ–Ω–∏–µ –Ω–µ–≤–æ–∑–º–æ–∂–Ω–æ.")


# === –®–∞–≥ 5: –í—ã–±–æ—Ä –ª—É—á—à–µ–π –º–æ–¥–µ–ª–∏ –∏ —ç–∫—Å–ø–æ—Ä—Ç ===
if 'models' in locals() and 'FEATURES' in locals():
    
    # --- 5.1 –í—ã–±–æ—Ä –ø–æ–±–µ–¥–∏—Ç–µ–ª—è ---
    best_name = max(models.keys(), key=lambda k: models[k]["metrics"]["auc"])
    best_model_data = models[best_name]

    print("\n=== –®–∞–≥ 5: –†–ï–ó–£–õ–¨–¢–ê–¢–´ ===")
    print(f"üéâ –õ—É—á—à–∞—è –º–æ–¥–µ–ª—å: {best_name.upper()}")
    print("–ú–µ—Ç—Ä–∏–∫–∏ –Ω–∞ —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö:")
    print(json.dumps(best_model_data["metrics"], indent=2))

    # --- 5.2 –≠–∫—Å–ø–æ—Ä—Ç –≤ ONNX ---
    print(f"\n–≠–∫—Å–ø–æ—Ä—Ç '{best_name}' –≤ ONNX...")
    try:
        initial_type = [('input', FloatTensorType([None, len(FEATURES)]))]
        onx = convert_sklearn(
            best_model_data["model"], 
            initial_types=initial_type
        )
        with open(ONNX_PATH, "wb") as f:
            f.write(onx.SerializeToString())
        print(f"‚úÖ –ú–æ–¥–µ–ª—å —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: {ONNX_PATH.resolve()}")
    except Exception as e:
        print(f"‚ùå –û–®–ò–ë–ö–ê —ç–∫—Å–ø–æ—Ä—Ç–∞ –≤ ONNX: {e}")

    # --- 5.3 –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ JSON-—Ñ–∞–π–ª–æ–≤ –¥–ª—è Backend ---
    try:
        with open(FEATURES_JSON, "w", encoding="utf-8") as f:
            json.dump({
                "name": "fatigue_risk_v1", # –ù–æ–≤–æ–µ –∏–º—è!
                "version": "1.0.0",
                "features": FEATURES
            }, f, ensure_ascii=False, indent=2)
        print(f"‚úÖ –ü–æ—Ä—è–¥–æ–∫ —Ñ–∏—á —Å–æ—Ö—Ä–∞–Ω–µ–Ω: {FEATURES_JSON.resolve()}")

        with open(METRICS_JSON, "w", encoding="utf-8") as f:
            json.dump({
                "best_model": best_name,
                **best_model_data["metrics"]
            }, f, ensure_ascii=False, indent=2)
        print(f"‚úÖ –ú–µ—Ç—Ä–∏–∫–∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã: {METRICS_JSON.resolve()}")
    except Exception as e:
        print(f"‚ùå –û–®–ò–ë–ö–ê —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏—è JSON: {e}")
        
    print("\n--- –í–µ—Å—å –ø–∞–π–ø–ª–∞–π–Ω —É—Å–ø–µ—à–Ω–æ –∑–∞–≤–µ—Ä—à–µ–Ω! ---")

else:
    print("\n–û–®–ò–ë–ö–ê: 'models' –∏–ª–∏ 'FEATURES' –Ω–µ –Ω–∞–π–¥–µ–Ω—ã.")


=== –®–∞–≥ 1: –ó–∞–ø—É—Å–∫ –æ—á–∏—Å—Ç–∫–∏ –∏ –ø–æ–¥–≥–æ—Ç–æ–≤–∫–∏ –¥–∞–Ω–Ω—ã—Ö... ===
--- –ó–∞–≥—Ä—É–∑–∫–∞ —Å—ã—Ä—ã—Ö –¥–∞–Ω–Ω—ã—Ö –∏–∑ health_fitness_dataset.csv ---
–†–∞—Å—á–µ—Ç Z-scores –∏ Deltas –¥–ª—è —Å–Ω–∞ –∏ —à–∞–≥–æ–≤...
–£–¥–∞–ª–µ–Ω–æ 6005 —Å—Ç—Ä–æ–∫ (–ø–µ—Ä–∏–æ–¥ '–ø—Ä–æ–≥—Ä–µ–≤–∞' Z-–æ—Ü–µ–Ω–æ–∫).

--- –ë–∞–ª–∞–Ω—Å –Ω–æ–≤–æ–π —Ü–µ–ª–∏ 'y_target_fatigue' ---
y_target_fatigue
0    0.796713
1    0.203287
Name: proportion, dtype: float64
-------------------------------------------------
--- –û—á–∏—Å—Ç–∫–∞ –∏ —Å–æ–∑–¥–∞–Ω–∏–µ —Ñ–∏—á –∑–∞–≤–µ—Ä—à–µ–Ω—ã ---

=== –®–∞–≥ 2: –û–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ X, y, –∏ groups... ===
--- –®–∞–≥ 2: X, y, groups —Å–æ–∑–¥–∞–Ω—ã. 9 —Ñ–∏—á ---

=== –®–∞–≥ 3: –†–∞–∑–¥–µ–ª–µ–Ω–∏–µ 3000 –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π... ===
–†–∞–∑–¥–µ–ª–µ–Ω–∏–µ –∑–∞–≤–µ—Ä—à–µ–Ω–æ: 545520 train, 136176 test.

=== –®–∞–≥ 4: –û–±—É—á–µ–Ω–∏–µ –º–æ–¥–µ–ª–µ–π... ===
–û–±—É—á–µ–Ω–∏–µ 1/2: Logistic Regression...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned['y_target_fatigue'] = (


–û–±—É—á–µ–Ω–∏–µ 2/2: Random Forest...
--- –û–±—É—á–µ–Ω–∏–µ –∑–∞–≤–µ—Ä—à–µ–Ω–æ. ---

=== –®–∞–≥ 5: –†–ï–ó–£–õ–¨–¢–ê–¢–´ ===
üéâ –õ—É—á—à–∞—è –º–æ–¥–µ–ª—å: RF
–ú–µ—Ç—Ä–∏–∫–∏ –Ω–∞ —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö:
{
  "acc": 0.9020899424274468,
  "f1": 0.7747994257241787,
  "auc": 0.9592008232426553,
  "precision": 0.7270191454291872,
  "recall": 0.8293018042448567
}

–≠–∫—Å–ø–æ—Ä—Ç 'rf' –≤ ONNX...
‚úÖ –ú–æ–¥–µ–ª—å —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∞: /Users/aruuketurgunbaeva/health-monitoring-app/ml/export/fatigue_model_v1.onnx
‚úÖ –ü–æ—Ä—è–¥–æ–∫ —Ñ–∏—á —Å–æ—Ö—Ä–∞–Ω–µ–Ω: /Users/aruuketurgunbaeva/health-monitoring-app/ml/export/fatigue_model_v1_features.json
‚úÖ –ú–µ—Ç—Ä–∏–∫–∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã: /Users/aruuketurgunbaeva/health-monitoring-app/ml/export/fatigue_model_v1_metrics.json

--- –í–µ—Å—å –ø–∞–π–ø–ª–∞–π–Ω —É—Å–ø–µ—à–Ω–æ –∑–∞–≤–µ—Ä—à–µ–Ω! ---
