In [1]:
import numpy as np
import pandas as pd


In [2]:
df = pd.read_csv(r"C:\Users\aniru\Desktop\fogcomputing_project\unclean_smartwatch_health_data.csv")


In [3]:
df = df[['Heart Rate (BPM)', 'Blood Oxygen Level (%)', 'Stress Level', 'Activity Level']]


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import joblib, os

In [7]:
df = pd.read_csv(r"C:\Users\aniru\Desktop\fogcomputing_project\unclean_smartwatch_health_data.csv")

In [8]:
print("Initial Columns:", df.columns.tolist())
print("\nMissing values before cleaning:\n", df.isnull().sum())

Initial Columns: ['User ID', 'Heart Rate (BPM)', 'Blood Oxygen Level (%)', 'Step Count', 'Sleep Duration (hours)', 'Activity Level', 'Stress Level']

Missing values before cleaning:
 User ID                   201
Heart Rate (BPM)          400
Blood Oxygen Level (%)    300
Step Count                100
Sleep Duration (hours)    150
Activity Level            200
Stress Level              200
dtype: int64


In [9]:
required_cols = ['Heart Rate (BPM)', 'Blood Oxygen Level (%)', 'Stress Level', 'Activity Level']
df = df[required_cols].copy()
df.columns = ['heart_rate', 'spo2', 'stress_level', 'activity_level']

In [10]:
for col in df.columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

In [11]:
df['heart_rate'] = df['heart_rate'].fillna(df['heart_rate'].median())
df['spo2'] = df['spo2'].fillna(df['spo2'].median())
df['stress_level'] = df['stress_level'].fillna(df['stress_level'].median())

In [12]:
df['heart_rate'] = df['heart_rate'].clip(40, 200)
df['spo2'] = df['spo2'].clip(85, 100)
df['stress_level'] = df['stress_level'].clip(0, 10)

In [14]:
df.columns
df.head(10)

Unnamed: 0,heart_rate,spo2,stress_level,activity_level
0,58.939776,98.80965,1.0,
1,75.220601,98.532195,5.0,
2,200.0,97.052954,5.0,
3,40.0,96.894213,3.0,
4,61.950165,98.583797,6.0,
5,96.285938,94.20291,10.0,
6,47.272257,95.38976,2.0,
7,81.733497,95.981343,1.0,
8,93.340338,97.218945,1.0,
9,51.409967,97.272787,8.0,


In [15]:
def simulate_activity(row):
    hr = row['heart_rate']
    spo2 = row['spo2']
    stress = row['stress_level']
    
    # baseline: inverse of stress
    activity = 100 - (stress * 8)
    
    # dynamic behavior rules
    if (hr > 120) and (spo2 < 94) and (stress <= 3):
        activity += 20
    elif (hr < 70) and (spo2 >= 97) and (stress >= 7):
        activity -= 20
    
    return int(np.clip(activity, 0, 100))

In [16]:
if df['activity_level'].isnull().any():
    df['activity_level'] = df.apply(simulate_activity, axis=1)

In [17]:
df['activity_level'] = df['activity_level'].fillna(df['activity_level'].median()).clip(0, 100)

print("\n✅ Data after cleaning:\n", df.head(10))
print("\nMissing values after cleaning:\n", df.isnull().sum())


✅ Data after cleaning:
    heart_rate       spo2  stress_level  activity_level
0   58.939776  98.809650           1.0              92
1   75.220601  98.532195           5.0              60
2  200.000000  97.052954           5.0              60
3   40.000000  96.894213           3.0              76
4   61.950165  98.583797           6.0              52
5   96.285938  94.202910          10.0              20
6   47.272257  95.389760           2.0              84
7   81.733497  95.981343           1.0              92
8   93.340338  97.218945           1.0              92
9   51.409967  97.272787           8.0              16

Missing values after cleaning:
 heart_rate        0
spo2              0
stress_level      0
activity_level    0
dtype: int64


In [18]:
window = 6

In [19]:
df['hr_trend'] = df['heart_rate'].diff().rolling(window).mean()
df['stress_trend'] = df['stress_level'].diff().rolling(window).mean()
df['spo2_trend'] = df['spo2'].diff().rolling(window).mean()
df['activity_trend'] = df['activity_level'].diff().rolling(window).mean()

In [20]:
df['label'] = 0

# Rule 1: HR ↑ & Stress ↑, Activity ↓
cond1 = (df['hr_trend'] > 0.5) & (df['stress_trend'] > 0.3) & (df['activity_trend'] < -0.5)

# Rule 2: SpO₂ ↓ & HR ↓ & Activity ↓
cond2 = (df['spo2_trend'] < -0.5) & (df['hr_trend'] < -0.3) & (df['activity_trend'] < -0.5)

# Rule 3: High stress & low activity
cond3 = (df['stress_level'] > 7) & (df['activity_level'] < 25)

# Rule 4: Low SpO₂ absolute (<92)
cond4 = (df['spo2'] < 92)

# Apply rules
df.loc[cond1 | cond2 | cond3 | cond4, 'label'] = 1
df['label'] = df['label'].fillna(0).astype(int)

In [21]:
df = df.drop(['hr_trend', 'stress_trend', 'spo2_trend', 'activity_trend'], axis=1)

print("\n✅ Labeling complete. Class distribution:")
print(df['label'].value_counts(normalize=True))


✅ Labeling complete. Class distribution:
label
0    0.7277
1    0.2723
Name: proportion, dtype: float64


In [22]:
X = df[['heart_rate', 'spo2', 'stress_level', 'activity_level']]
y = df['label']

In [23]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [24]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [25]:
clf = RandomForestClassifier(n_estimators=200, random_state=42, class_weight='balanced')
clf.fit(X_train_scaled, y_train)

In [26]:
iso = IsolationForest(contamination=0.02, random_state=42)
iso.fit(X_train_scaled)

In [27]:
y_pred = clf.predict(X_test_scaled)
print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


📊 Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.93      0.90      1455
           1       0.77      0.61      0.69       545

    accuracy                           0.85      2000
   macro avg       0.82      0.77      0.79      2000
weighted avg       0.84      0.85      0.84      2000


Confusion Matrix:
 [[1357   98]
 [ 210  335]]


In [28]:
os.makedirs("models", exist_ok=True)
joblib.dump({'scaler': scaler, 'clf': clf, 'iso': iso}, "models/fog_model_final.joblib")

print("\n✅ Model training complete and saved to models/fog_model_final.joblib")


✅ Model training complete and saved to models/fog_model_final.joblib
