In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import tensorflow as tf
import joblib
import os

### Data Preprocessing

In [2]:
df = pd.read_csv("applewatch_heart_risk_3class.csv")

- age : Kullanıcının yaşı | Numeric (int)
- gender : Cinsiyet | 0 = Erkek (Male), 1 = Kadın (Female) | Categorical (0/1)
- smoking : Sigara içiyor mu | 0 = Hayır, 1 = Evet | Binary (0/1)
- familyHistory : Ailede kalp hastalığı var mı | 0: Hayır 1: Evet | Binary (0/1)
- pastAttacks : Geçmiş çarpıntı veya atak sayısı | Numeric (int)
- heartRateAvg : Günlük ölçülen ortalama kalp atış hızı (bpm) | Numeric (int)
- restingHeartRate : Dinlenme halindeki kalp atış hızı (bpm) | Numeric (int)
- steps : Günlük adım sayısı | Numeric (int)
- sleepHours : Günlük uyku süresi (saat) | Numeric (float)
- exerciseMinutes : Günlük egzersiz süresi (dakika) | Numeric (int)
- risk : Hedef değişken,kalp riski | 0 = düşük risk, 1 = orta risk, 2 = yüksek risk

In [3]:
print(df.head())

   age gender  heartRateAvg  restingHeartRate  steps  sleepHours  \
0   53      F           107                82   5144         6.7   
1   63      F           112                92   2676         4.4   
2   44      F            61                55   6761         8.7   
3   84      M           139               112   3404         5.4   
4   45      M           105                81   2199         6.9   

   exerciseMinutes  pastAttacks  smoking  familyHistory  risk  
0               44            1        0              0     1  
1               20            3        0              1     2  
2               59            0        0              1     0  
3                9            4        1              0     2  
4               27            2        1              1     1  


In [4]:
# Veri tipi kontrolü
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9999 entries, 0 to 9998
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   age               9999 non-null   int64  
 1   gender            9999 non-null   object 
 2   heartRateAvg      9999 non-null   int64  
 3   restingHeartRate  9999 non-null   int64  
 4   steps             9999 non-null   int64  
 5   sleepHours        9999 non-null   float64
 6   exerciseMinutes   9999 non-null   int64  
 7   pastAttacks       9999 non-null   int64  
 8   smoking           9999 non-null   int64  
 9   familyHistory     9999 non-null   int64  
 10  risk              9999 non-null   int64  
dtypes: float64(1), int64(9), object(1)
memory usage: 859.4+ KB
None


In [5]:
# Eksik değer kontrolü
print(df.isnull().sum())

age                 0
gender              0
heartRateAvg        0
restingHeartRate    0
steps               0
sleepHours          0
exerciseMinutes     0
pastAttacks         0
smoking             0
familyHistory       0
risk                0
dtype: int64


In [6]:
# Encode Categorical Features - Kategorik Verileri Sayısallaştırma
# gender: 0 = Male, 1 = Female
df["gender"] = df["gender"].map({"M": 0, "F": 1})

In [7]:
# Aykırı değerleri sınırlandırır - Clip Outliers
clip_limits = {
    "age": (18, 90),
    "heartRateAvg": (50, 180),
    "restingHeartRate": (40, 140),
    "steps": (0, 20000),
    "exerciseMinutes": (0, 120),
    "sleepHours": (3, 12),
    "pastAttacks": (0, 10)
}

for col, (min_val, max_val) in clip_limits.items():
    df[col] = df[col].clip(lower=min_val, upper=max_val)

In [8]:
X = df.drop("risk", axis=1)   # features
y = df["risk"]                # label

In [9]:
print("Risk sınıfları dağılımı:\n", y.value_counts())

Risk sınıfları dağılımı:
 1    3333
2    3333
0    3333
Name: risk, dtype: int64


In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [11]:
# Standard Scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [12]:
# Scaler'ı kaydeder
joblib.dump(scaler, "heart_risk_scaler.pkl")

['heart_risk_scaler.pkl']

In [13]:
# Save processed dataset to CSV
pd.DataFrame(X_train_scaled, columns=X_train.columns).to_csv("X_train_scaled.csv", index=False)
pd.DataFrame(X_test_scaled, columns=X_test.columns).to_csv("X_test_scaled.csv", index=False)

In [15]:
print("Eğitim verisi boyutu:", X_train_scaled.shape)
print("Test verisi boyutu:", X_test_scaled.shape)
print("Eğitim hedef sınıf dağılımı:", y_train.value_counts().to_dict())
print("Test hedef sınıf dağılımı:", y_test.value_counts().to_dict())

Eğitim verisi boyutu: (7999, 10)
Test verisi boyutu: (2000, 10)
Eğitim hedef sınıf dağılımı: {0: 2667, 2: 2666, 1: 2666}
Test hedef sınıf dağılımı: {2: 667, 1: 667, 0: 666}


### Train TensorFlow Model

In [16]:
# TensorFlow Model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')  # 3 sınıf: düşük/orta/yüksek
])

In [17]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [18]:
callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=10, restore_best_weights=True
)

In [19]:
history = model.fit(
    X_train_scaled, y_train,
    validation_data=(X_test_scaled, y_test),
    epochs=100,
    batch_size=32,
    callbacks=[callback]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


In [20]:
# Model Evaluation
loss, acc = model.evaluate(X_test_scaled, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {acc:.4f}")

Test Loss: 0.0006, Test Accuracy: 1.0000


- Klasör yoksa oluştururuz ve modeli oraya kaydediriz.

In [21]:
os.makedirs(r"C:\models", exist_ok=True)

In [22]:
model.save(r"C:\models\heart_risk_model_tf3.h5")
print("TensorFlow model saved!")

TensorFlow model saved!


In [26]:
print("SWIFT İÇİN SCALER DEĞERLERİ:")
print("="*40)

print("let scalerMean: [Float] = [", end="")
for i, mean_val in enumerate(scaler.mean_):
    if i > 0:
        print(", ", end="")
    print(f"{mean_val:.6f}", end="")
print("]")

print("let scalerStd: [Float] = [", end="")
for i, std_val in enumerate(scaler.scale_):
    if i > 0:
        print(", ", end="")
    print(f"{std_val:.6f}", end="")
print("]")

print("\n\nKONTROL İÇİN AYNI VERİYLE TEST:")
print("="*40)

# Aynı veriyle test (Swift'te girdiğimiz değerlerle)
test_input = input("Test için Swift'te girdiğiniz değerleri virgülle ayırarak girin (örn: 75,0,120,95,2000,5.0,10,4,1,1): ")
values = [float(x.strip()) for x in test_input.split(',')]

test_df = pd.DataFrame([values], columns=["age","gender","heartRateAvg","restingHeartRate",
                                        "steps","sleepHours","exerciseMinutes",
                                        "pastAttacks","smoking","familyHistory"])

print(f"\nGirdi değerleri: {values}")

# Scale ederiz
test_scaled = scaler.transform(test_df)
print(f"Scaled değerler: {test_scaled[0]}")

# Predict ederiz
pred_prob = model.predict(test_scaled)[0]
pred_class = np.argmax(pred_prob)

print(f"Prediction probabilities: {pred_prob}")
print(f"Predicted class: {pred_class} -> {['Low Risk', 'Medium Risk', 'High Risk'][pred_class]}")

# Swift için scaled değerleri de veririz
print(f"\nSwift için manuel scale kontrolü:")
for i, (orig, scaled) in enumerate(zip(values, test_scaled[0])):
    print(f"Feature {i}: {orig} -> (({orig} - {scaler.mean_[i]:.6f}) / {scaler.scale_[i]:.6f}) = {scaled:.6f}")

SWIFT İÇİN SCALER DEĞERLERİ:
let scalerMean: [Float] = [52.437305, 0.493687, 100.737717, 82.024503, 4341.052507, 6.494724, 33.539567, 1.823103, 0.411551, 0.469559]
let scalerStd: [Float] = [19.475188, 0.499960, 22.777984, 17.156535, 2526.834910, 1.319431, 22.937767, 1.453479, 0.492115, 0.499072]


KONTROL İÇİN AYNI VERİYLE TEST:


Test için Swift'te girdiğiniz değerleri virgülle ayırarak girin (örn: 75,0,120,95,2000,5.0,10,4,1,1):  75,0,120,95,2000,5.0,10,4,1,1



Girdi değerleri: [75.0, 0.0, 120.0, 95.0, 2000.0, 5.0, 10.0, 4.0, 1.0, 1.0]
Scaled değerler: [ 1.15853543 -0.98745214  0.84565353  0.75630055 -0.92647624 -1.13285554
 -1.02623621  1.49771516  1.19575495  1.06285428]
Prediction probabilities: [9.1210510e-18 8.7246776e-10 1.0000000e+00]
Predicted class: 2 -> High Risk

Swift için manuel scale kontrolü:
Feature 0: 75.0 -> ((75.0 - 52.437305) / 19.475188) = 1.158535
Feature 1: 0.0 -> ((0.0 - 0.493687) / 0.499960) = -0.987452
Feature 2: 120.0 -> ((120.0 - 100.737717) / 22.777984) = 0.845654
Feature 3: 95.0 -> ((95.0 - 82.024503) / 17.156535) = 0.756301
Feature 4: 2000.0 -> ((2000.0 - 4341.052507) / 2526.834910) = -0.926476
Feature 5: 5.0 -> ((5.0 - 6.494724) / 1.319431) = -1.132856
Feature 6: 10.0 -> ((10.0 - 33.539567) / 22.937767) = -1.026236
Feature 7: 4.0 -> ((4.0 - 1.823103) / 1.453479) = 1.497715
Feature 8: 1.0 -> ((1.0 - 0.411551) / 0.492115) = 1.195755
Feature 9: 1.0 -> ((1.0 - 0.469559) / 0.499072) = 1.062854
