Importing libaries

In [128]:

import pandas as pd
import glob
import re
import os
import sys
import pickle
import datetime
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn import tree, metrics
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay
from scipy.signal import butter, filtfilt, find_peaks
from sklearn.tree import DecisionTreeClassifier,export_graphviz
from sklearn.model_selection import train_test_split

Adding helpers:

In [113]:
def calc_magnitude(data):

    # Calculate magnitude
    data['accel_mag'] = np.sqrt(data['x']**2 + data['y']**2 + data['z']**2) # absolute accel magnitude
    data['accel_mag'] = data['accel_mag'] - data['accel_mag'].mean() # detrend: "remove gravity"

    return data

In [114]:
def remove_noise(data,sampling_rate):
    from scipy.signal import butter, filtfilt, find_peaks

    # Low pass filter
    cutoff = 5 # Hz
    order = 2
    b, a = butter(order, cutoff/(sampling_rate/2), btype='lowpass')
    data['filtered_accel_mag'] = filtfilt(b, a, data['accel_mag'])

    return data

In [115]:
def add_features(window):
    features = {}
    features['avg'] = window['filtered_accel_mag'].mean()
    features['max'] = window['filtered_accel_mag'].quantile(1)
    features['med'] = window['filtered_accel_mag'].quantile(0.5)
    features['min'] = window['filtered_accel_mag'].quantile(0)
    features['q25'] = window['filtered_accel_mag'].quantile(0.25)
    features['q75'] = window['filtered_accel_mag'].quantile(0.75)
    features['std'] = window['filtered_accel_mag'].std()
    df = pd.DataFrame()
    df = df._append(features,ignore_index=True)
    return df

In [116]:
def train_decision_tree(frames):
    # Extract feature columns
    X = frames[['avg', 'max', 'med', 'min', 'q25', 'q75', 'std']]

    # Extract target column
    y = frames['activity']

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Create model
    dt_model = DecisionTreeClassifier(criterion='entropy',max_depth=5).fit(X_train, y_train)
    dt_pred = dt_model.predict(X_test)

    # Evaluate on test set
    acc = dt_model.score(X_test, y_test)
    dt_cm = confusion_matrix(y_test, dt_pred, labels=dt_model.classes_)
    print(classification_report(y_test, dt_pred))
    print("Accuracy on test set:", acc)

    return dt_model,dt_cm,acc

Calulating the orientation for sleep tracking:

In [117]:
def calc_orientation(data):
    data['pitch'] = np.arctan2(data['y'], np.sqrt(data['x']**2 + data['z']**2))
    data['roll'] = np.arctan2(-data['x'], data['z'])    
    data['pitch'] = np.degrees(data['pitch'])
    data['roll'] = np.degrees(data['roll'])


    
    return data

Adding extra feauters for sleep position:

In [126]:
def add_position_features(window):
    features = {}
    
    base_features = add_features(window)
    
    features['pitch_mean'] = window['pitch'].mean()
    features['pitch_std'] = window['pitch'].std()
    features['roll_mean'] = window['roll'].mean()
    features['roll_std'] = window['roll'].std()
    
    features['stability'] = window['filtered_accel_mag'].std() / window['filtered_accel_mag'].mean()
    features['movement_intensity'] = np.sum(np.abs(np.diff(window['filtered_accel_mag'])))
    
    df = pd.DataFrame()
    df = df._append({**base_features.iloc[0], **features}, ignore_index=True)
    df= df._append()
    return df

In [119]:
def extract_position_features(data, window_sec, sample_rate, position):
    data = pd.read_csv('your_file.csv')
    data['timestamp'] = pd.to_datetime(data.index, unit='ns')
    data.set_index('timestamp', inplace=True)
    
    data = calc_orientation(data)
    
    window_size = f'{window_sec}s'
    
    resampled_data = data.resample(window_size)
    
    features_df = pd.DataFrame()
    
    for timestamp, window in resampled_data:
        if not window.empty:
            features = add_position_features(window)
            
            features['position'] = position
            
            features_df = pd.concat([features_df, features], ignore_index=True)
    
    return features_df

In [120]:
def train_position_classifier(frames):
    features = ['avg', 'max', 'med', 'min', 'q25', 'q75', 'std',
                'pitch_mean', 'pitch_std', 'roll_mean', 'roll_std',
                'stability', 'movement_intensity']
    
    X = frames[features]
    y = frames['position']

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42, stratify=y
    )

    dt_model = DecisionTreeClassifier(
        criterion='entropy',
        max_depth=6,  
        min_samples_leaf=5  
    ).fit(X_train, y_train)
    
    dt_pred = dt_model.predict(X_test)

    acc = dt_model.score(X_test, y_test)
    dt_cm = confusion_matrix(y_test, dt_pred, labels=dt_model.classes_)
    print(classification_report(y_test, dt_pred))
    print("Accuracy on test set:", acc)

    return dt_model, dt_cm, acc

Burdan sonrasi komple GPT!

In [121]:
def load_and_merge_data(accel_file, gyro_file, sample_rate):
    # Load accelerometer data
    accel_data = pd.read_csv(accel_file)
    gyro_data = pd.read_csv(gyro_file)

    # Assuming columns are: time, x, y, z
    # Rename for clarity: ax, ay, az for accel; gx, gy, gz for gyro
    # Convert time to datetime if needed
    accel_data['timestamp'] = pd.to_datetime(accel_data['time'])
    gyro_data['timestamp'] = pd.to_datetime(gyro_data['time'])

    accel_data.set_index('timestamp', inplace=True)
    gyro_data.set_index('timestamp', inplace=True)

    # Resample both to ensure same frequency and align times
    accel_data = accel_data.resample(f'{1/sample_rate}S').mean().interpolate()
    gyro_data = gyro_data.resample(f'{1/sample_rate}S').mean().interpolate()

    # Merge on nearest timestamps
    merged = pd.merge_asof(accel_data.sort_index(), gyro_data.sort_index(),
                           left_index=True, right_index=True, direction='nearest',
                           suffixes=('_accel','_gyro'))

    # Rename columns to standard names
    merged.rename(columns={
        'x_accel': 'ax', 'y_accel': 'ay', 'z_accel': 'az',
        'x_gyro': 'gx', 'y_gyro': 'gy', 'z_gyro': 'gz'
    }, inplace=True)

    return merged

In [122]:
def extract_combined_features(data, window_sec, sample_rate, position):
    # Compute accel magnitude and remove noise
    data = calc_magnitude(data)
    data = remove_noise(data, sample_rate)

    # Compute orientation from accelerometer
    # The calc_orientation expects 'x','y','z' columns, so map ax,ay,az
    data['x'] = data['ax']
    data['y'] = data['ay']
    data['z'] = data['az']
    data = calc_orientation(data)

    # Now we have accel_mag, filtered_accel_mag, pitch, roll
    # Also have gyro data: gx, gy, gz

    # Add gyro magnitude if needed
    data['gyro_mag'] = np.sqrt(data['gx']**2 + data['gy']**2 + data['gz']**2)

    # Resample into windows
    data['timestamp'] = data.index
    data.set_index('timestamp', inplace=True)

    window_size = f'{window_sec}s'
    resampled_data = data.resample(window_size)

    features_df = pd.DataFrame()

    for timestamp, window in resampled_data:
        if not window.empty:
            # Basic accel features
            base_features = add_features(window)  # avg, max, med, min, q25, q75, std (from filtered_accel_mag)

            # Orientation features
            orientation_feats = {
                'pitch_mean': window['pitch'].mean(),
                'pitch_std': window['pitch'].std(),
                'roll_mean': window['roll'].mean(),
                'roll_std': window['roll'].std()
            }

            # Stability and movement intensity
            # stability = std/mean of filtered_accel_mag
            mean_val = window['filtered_accel_mag'].mean() if window['filtered_accel_mag'].mean() != 0 else 1e-9
            stability = window['filtered_accel_mag'].std() / mean_val
            movement_intensity = np.sum(np.abs(np.diff(window['filtered_accel_mag'])))

            extra_feats = {
                'stability': stability,
                'movement_intensity': movement_intensity
            }

            combined_features = {**base_features.iloc[0], **orientation_feats, **extra_feats}
            combined_features['position'] = position

            features_df = pd.concat([features_df, pd.DataFrame([combined_features])], ignore_index=True)

    return features_df


In [123]:
def process_combined_data(accel_root, gyro_root, output_filename="combined_sleep_data.csv", window_sec=5, sample_rate=100):
    all_data = pd.DataFrame()

    for position in ['back', 'side', 'stomach']:
        accel_files = glob.glob(os.path.join(accel_root, position, '*.csv'))

        for accel_file in accel_files:
            filename = os.path.basename(accel_file)
            gyro_file = os.path.join(gyro_root, position, filename)

            if not os.path.exists(gyro_file):
                print(f"No matching gyro file for {accel_file}, skipping.")
                continue

            merged_data = load_and_merge_data(accel_file, gyro_file, sample_rate)

            features_df = extract_combined_features(merged_data, window_sec, sample_rate, position)
            all_data = pd.concat([all_data, features_df], ignore_index=True)

    all_data.to_csv(output_filename, index=False)
    return all_data


In [124]:
def train_position_classifier(frames):
    # Ensure the needed features are present
    features = [
        'avg', 'max', 'med', 'min', 'q25', 'q75', 'std',
        'pitch_mean', 'pitch_std', 'roll_mean', 'roll_std',
        'stability', 'movement_intensity'
    ]

    X = frames[features]
    y = frames['position']

    # Stratify to maintain position distribution
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=42, stratify=y
    )

    dt_model = DecisionTreeClassifier(
        criterion='entropy',
        max_depth=6,
        min_samples_leaf=5
    ).fit(X_train, y_train)

    dt_pred = dt_model.predict(X_test)

    acc = accuracy_score(y_test, dt_pred)
    print(classification_report(y_test, dt_pred))
    print("Accuracy on test set:", acc)

    dt_cm = confusion_matrix(y_test, dt_pred, labels=dt_model.classes_)

    return dt_model, dt_cm, acc


In [125]:
accel_root = './data/acceloremeter'
gyro_root = './data/gyroscope'
output_file = "combined_sleep_data.csv"

# Process data and extract features
all_data = process_combined_data(accel_root, gyro_root, output_file, window_sec=5, sample_rate=100)

# Train the classifier
dt_model, dt_cm, acc = train_position_classifier(all_data)

# Print confusion matrix
print("Confusion Matrix:\n", dt_cm)

# Visualize confusion matrix
position_labels = all_data['position'].unique()
plt.figure(figsize=(6,4))
sns.heatmap(dt_cm, annot=True, fmt='d', cmap='Blues', xticklabels=position_labels, yticklabels=position_labels)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix for Sleep Position Classification')
plt.show()

# Visualize feature importance
features = [
    'avg', 'max', 'med', 'min', 'q25', 'q75', 'std',
    'pitch_mean', 'pitch_std', 'roll_mean', 'roll_std',
    'stability', 'movement_intensity'
]
importances = dt_model.feature_importances_
fi_df = pd.DataFrame({'Feature': features, 'Importance': importances}).sort_values('Importance', ascending=False)

plt.figure(figsize=(10,6))
sns.barplot(x='Importance', y='Feature', data=fi_df)
plt.title('Feature Importance')
plt.show()

No matching gyro file for ./data/acceloremeter/back/CerenBackAcc.csv, skipping.
No matching gyro file for ./data/acceloremeter/back/CerenBackAcc8.csv, skipping.
No matching gyro file for ./data/acceloremeter/back/CerenBackAcc9.csv, skipping.


  accel_data = accel_data.resample(f'{1/sample_rate}S').mean().interpolate()
  gyro_data = gyro_data.resample(f'{1/sample_rate}S').mean().interpolate()


KeyError: 'x'