In [1]:
import pandas as pd
import numpy as np

In [2]:
def generate_realistic_balanced_dataset(size=10000):
    per_class = size // 3
    data = []

    for risk_label in [0, 1, 2]:
        for _ in range(per_class):
            # Yaş
            if risk_label == 0:
                age = np.random.randint(18, 45)
            elif risk_label == 1:
                age = np.random.randint(40, 65)
            else:
                age = np.random.randint(60, 90)
            
            # Cinsiyet
            gender = np.random.choice(["M", "F"])
            
            # Sigara & Aile geçmişi (olasılıklı)
            if risk_label == 0:
                smoking = np.random.choice([0,1], p=[0.85,0.15])
                familyHistory = np.random.choice([0,1], p=[0.8,0.2])
            elif risk_label == 1:
                smoking = np.random.choice([0,1], p=[0.6,0.4])
                familyHistory = np.random.choice([0,1], p=[0.5,0.5])
            else:
                smoking = np.random.choice([0,1], p=[0.3,0.7])
                familyHistory = np.random.choice([0,1], p=[0.3,0.7])

            # Egzersiz ve adım sayısı korelasyonu
            if risk_label == 0:
                exerciseMinutes = np.random.randint(30, 90)
                steps = int(np.random.normal(exerciseMinutes*120, 500))
            elif risk_label == 1:
                exerciseMinutes = np.random.randint(15, 45)
                steps = int(np.random.normal(exerciseMinutes*100, 1000))
            else:
                exerciseMinutes = np.random.randint(0, 25)
                steps = int(np.random.normal(exerciseMinutes*80 + 2000, 1000))

            # Uyku
            if risk_label == 0:
                sleepHours = round(np.random.uniform(7, 9),1)
            elif risk_label == 1:
                sleepHours = round(np.random.uniform(6, 7),1)
            else:
                sleepHours = round(np.random.uniform(4, 6),1)

            # Dinlenme ve ortalama nabız (riskle korelasyonlu)
            if risk_label == 0:
                restingHeartRate = np.random.randint(55, 75)
                heartRateAvg = restingHeartRate + np.random.randint(5,20)
            elif risk_label == 1:
                restingHeartRate = np.random.randint(70, 90)
                heartRateAvg = restingHeartRate + np.random.randint(10,30)
            else:
                restingHeartRate = np.random.randint(85, 120)
                heartRateAvg = restingHeartRate + np.random.randint(10,40)

            # Geçmiş ataklar
            if risk_label == 0:
                pastAttacks = np.random.randint(0,2)
            elif risk_label == 1:
                pastAttacks = np.random.randint(1,3)
            else:
                pastAttacks = np.random.randint(2,6)

            data.append([
                age, gender, heartRateAvg, restingHeartRate, steps, sleepHours,
                exerciseMinutes, pastAttacks, smoking, familyHistory, risk_label
            ])

    columns = [
        "age","gender","heartRateAvg","restingHeartRate","steps","sleepHours",
        "exerciseMinutes","pastAttacks","smoking","familyHistory","risk"
    ]
    df = pd.DataFrame(data, columns=columns)
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)
    return df

In [3]:
# veri setini üretir
df = generate_realistic_balanced_dataset(10000)

In [4]:
df.to_csv("applewatch_heart_risk_3class.csv", index=False)

In [5]:
print(df["risk"].value_counts())

1    3333
2    3333
0    3333
Name: risk, dtype: int64
