In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path


In [9]:
# Project Paths 

PROJECT_ROOT = Path("/Users/vineeth/Desktop/RingCon/RingConn-Analysis")
DATA_RAW = PROJECT_ROOT / "data" / "raw"
DATA_PROCESSED = PROJECT_ROOT / "data" / "processed"


In [36]:
# Loading the data
df_activity = pd.read_csv(DATA_RAW/"V_activity.csv")
df_vitals = pd.read_csv(DATA_RAW/"V_vital_signs.csv")
df_sleep = pd.read_csv(DATA_RAW/"V_sleep.csv")

In [37]:
def clean_columns(df):
    df.columns = (
        df.columns
        .str.strip()
        .str.lower()
        .str.replace(" ", "_")
        .str.replace(r"[()/%.-]", "", regex=True)
    )
    return df

df_activity = clean_columns(df_activity)
df_vitals = clean_columns(df_vitals)
df_sleep = clean_columns(df_sleep)







In [32]:
df_sleep.columns

Index(['start_time', 'end_time', 'falling_asleep_time', 'wakeup_time',
       'sleep_time_ratio', 'time_asleepmin', 'sleep_stages__awakemin',
       'sleep_stages__remmin', 'sleep_stages__light_sleepmin',
       'sleep_stages__deep_sleepmin'],
      dtype='str')

In [None]:
# Correct Data Types
columns = ["avg_spo2", "min_spo2", "max_spo2"]
for col in columns:
    df_vitals[col] = df_vitals[col].str.rstrip("%").astype(float)

df_vitals["date"] = pd.to_datetime(df_vitals["date"], errors="coerce")
df_activity["date"] = pd.to_datetime(df_activity["date"], errors="coerce")

# Correct format for df_sleep
date_columns = ["start_time", "end_time" , "falling_asleep_time", "wakeup_time"]
for col in date_columns:
    df_sleep[col] = pd.to_datetime(df_sleep[col], errors="coerce")

df_sleep["sleep_time_ratio"] = df_sleep["sleep_time_ratio"].str.rstrip("%").astype(float)


In [68]:
# Feature Engineering

#a) time to fall asleep
df_sleep["time_to_fall_asleep"] = df_sleep["falling_asleep_time"] - df_sleep["start_time"]

#b) time to wake up
df_sleep["time_to_wake_up"] = df_sleep["end_time"] - df_sleep["wakeup_time"]

#c) time asleep hour
df_sleep["time_asleep_hours"] = df_sleep['time_asleepmin'] / 60

#d) REM cycle percentage (Sleep efficiency)
df_sleep["rem_pct"] = df_sleep["sleep_stages__remmin"] / df_sleep["time_asleepmin"]

#e) Deep Sleep percentage
df_sleep["deep_sleep_pct"] = df_sleep["sleep_stages__deep_sleepmin"] / df_sleep["time_asleepmin"]

#f) Light Sleep percentage
df_sleep["light_sleep_pct"] = df_sleep["sleep_stages__light_sleepmin"] / df_sleep["time_asleepmin"]

#g) Sleep midpoint 
df_sleep["sleep_midpoint"] = (
    df_sleep["start_time"] +
    (df_sleep["end_time"] - df_sleep["start_time"]) / 2
)

#h) Time in bed
df_sleep["time_in_bed"] = df_sleep["end_time"] - df_sleep["start_time"]

#i) Date of sleep 
df_sleep["date_of_sleep"] = df_sleep["wakeup_time"].dt.date








In [None]:
# Correcting the data types of the enigeered features
df_sleep["date_of_sleep"] = pd.to_datetime(df_sleep["date_of_sleep"], errors="coerce")

In [72]:
df_sleep.head()

Unnamed: 0,start_time,end_time,falling_asleep_time,wakeup_time,sleep_time_ratio,time_asleepmin,sleep_stages__awakemin,sleep_stages__remmin,sleep_stages__light_sleepmin,sleep_stages__deep_sleepmin,time_to_fall_asleep,time_to_wake_up,time_asleep_hours,rem_pct,deep_sleep_pct,light_sleep_pct,sleep_midpoint,time_in_bed,date_of_sleep
0,2026-01-01 05:59:27,2026-01-01 12:47:01,2026-01-01 06:14:28,2026-01-01 12:44:27,91.0,372,18,82,230,60,0 days 00:15:01,0 days 00:02:34,6.2,0.22043,0.16129,0.61828,2026-01-01 09:23:14.000,0 days 06:47:34,2026-01-01
1,2026-01-01 23:37:49,2026-01-02 09:05:19,2026-01-02 00:00:19,2026-01-02 08:55:19,88.0,502,33,120,300,82,0 days 00:22:30,0 days 00:10:00,8.366667,0.239044,0.163347,0.59761,2026-01-02 04:21:34.000,0 days 09:27:30,2026-01-02
2,2026-01-03 01:47:44,2026-01-03 11:30:14,2026-01-03 02:02:44,2026-01-03 11:17:44,91.0,531,24,108,328,95,0 days 00:15:00,0 days 00:12:30,8.85,0.20339,0.178908,0.617702,2026-01-03 06:38:59.000,0 days 09:42:30,2026-01-03
3,2026-01-03 22:55:19,2026-01-04 09:02:49,2026-01-03 23:15:19,2026-01-04 08:55:19,87.0,531,49,115,338,78,0 days 00:20:00,0 days 00:07:30,8.85,0.216573,0.146893,0.636535,2026-01-04 03:59:04.000,0 days 10:07:30,2026-01-04
4,2026-01-04 23:35:11,2026-01-05 08:27:50,2026-01-05 00:02:42,2026-01-05 08:17:50,90.0,480,16,100,285,95,0 days 00:27:31,0 days 00:10:00,8.0,0.208333,0.197917,0.59375,2026-01-05 04:01:30.500,0 days 08:52:39,2026-01-05


#### It's important to check how many days are there that have more than 1 sleep session in the same day

In [79]:
# Merging the data sets
# Create a new dataframe if you want to only select a few columns. I'll be using all the columns hence no temp df's

daily_df = pd.merge(df_activity, df_vitals, on="date", how = "inner")
daily_df = pd.merge(daily_df, df_sleep, left_on="date", right_on="date_of_sleep", how = "inner")

In [88]:
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_rows', None)  # Show all rows

In [89]:
daily_df.info()

<class 'pandas.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 31 columns):
 #   Column                        Non-Null Count  Dtype          
---  ------                        --------------  -----          
 0   date                          33 non-null     datetime64[us] 
 1   steps                         33 non-null     int64          
 2   calorieskcal                  33 non-null     int64          
 3   avg_heart_ratebpm             33 non-null     int64          
 4   min_heart_ratebpm             33 non-null     int64          
 5   max_heart_ratebpm             33 non-null     int64          
 6   avg_spo2                      33 non-null     float64        
 7   min_spo2                      33 non-null     float64        
 8   max_spo2                      33 non-null     float64        
 9   avg_hrvms                     33 non-null     int64          
 10  min_hrvms                     33 non-null     int64          
 11  max_hrvms                     33

In [95]:
daily_df

Unnamed: 0,date,steps,calorieskcal,avg_heart_ratebpm,min_heart_ratebpm,max_heart_ratebpm,avg_spo2,min_spo2,max_spo2,avg_hrvms,min_hrvms,max_hrvms,start_time,end_time,falling_asleep_time,wakeup_time,sleep_time_ratio,time_asleepmin,sleep_stages__awakemin,sleep_stages__remmin,sleep_stages__light_sleepmin,sleep_stages__deep_sleepmin,time_to_fall_asleep,time_to_wake_up,time_asleep_hours,rem_pct,deep_sleep_pct,light_sleep_pct,sleep_midpoint,time_in_bed,date_of_sleep
0,2026-01-01,6639,2592,83,61,131,97.0,90.0,100.0,37,11,144,2026-01-01 05:59:27,2026-01-01 12:47:01,2026-01-01 06:14:28,2026-01-01 12:44:27,91.0,372,18,82,230,60,0 days 00:15:01,0 days 00:02:34,6.2,0.22043,0.16129,0.61828,2026-01-01 09:23:14.000,0 days 06:47:34,2026-01-01
1,2026-01-02,1323,2018,75,52,132,96.0,92.0,99.0,59,11,113,2026-01-01 23:37:49,2026-01-02 09:05:19,2026-01-02 00:00:19,2026-01-02 08:55:19,88.0,502,33,120,300,82,0 days 00:22:30,0 days 00:10:00,8.366667,0.239044,0.163347,0.59761,2026-01-02 04:21:34.000,0 days 09:27:30,2026-01-02
2,2026-01-03,1777,2025,72,49,132,96.0,91.0,100.0,52,11,109,2026-01-03 01:47:44,2026-01-03 11:30:14,2026-01-03 02:02:44,2026-01-03 11:17:44,91.0,531,24,108,328,95,0 days 00:15:00,0 days 00:12:30,8.85,0.20339,0.178908,0.617702,2026-01-03 06:38:59.000,0 days 09:42:30,2026-01-03
3,2026-01-04,1720,2044,70,47,130,96.0,87.0,99.0,59,12,130,2026-01-03 22:55:19,2026-01-04 09:02:49,2026-01-03 23:15:19,2026-01-04 08:55:19,87.0,531,49,115,338,78,0 days 00:20:00,0 days 00:07:30,8.85,0.216573,0.146893,0.636535,2026-01-04 03:59:04.000,0 days 10:07:30,2026-01-04
4,2026-01-05,1355,2015,73,47,145,96.0,87.0,100.0,60,12,193,2026-01-04 23:35:11,2026-01-05 08:27:50,2026-01-05 00:02:42,2026-01-05 08:17:50,90.0,480,16,100,285,95,0 days 00:27:31,0 days 00:10:00,8.0,0.208333,0.197917,0.59375,2026-01-05 04:01:30.500,0 days 08:52:39,2026-01-05
5,2026-01-06,2075,1965,74,49,132,96.0,88.0,100.0,56,12,139,2026-01-06 00:25:19,2026-01-06 09:05:19,2026-01-06 00:52:49,2026-01-06 08:12:49,78.0,403,37,100,228,75,0 days 00:27:30,0 days 00:52:30,6.716667,0.248139,0.186104,0.565757,2026-01-06 04:45:19.000,0 days 08:40:00,2026-01-06
6,2026-01-07,2159,2096,79,55,134,96.0,85.0,99.0,53,11,162,2026-01-07 01:52:30,2026-01-07 09:07:30,2026-01-07 02:10:00,2026-01-07 08:55:00,87.0,380,25,100,180,100,0 days 00:17:30,0 days 00:12:30,6.333333,0.263158,0.263158,0.473684,2026-01-07 05:30:00.000,0 days 07:15:00,2026-01-07
7,2026-01-08,2825,2198,79,54,126,97.0,89.0,99.0,48,12,143,2026-01-08 02:53:17,2026-01-08 10:20:51,2026-01-08 03:13:17,2026-01-08 09:48:21,83.0,372,24,60,222,90,0 days 00:20:00,0 days 00:32:30,6.2,0.16129,0.241935,0.596774,2026-01-08 06:37:04.000,0 days 07:27:34,2026-01-08
8,2026-01-09,1885,2102,81,56,146,96.0,89.0,99.0,33,11,83,2026-01-09 01:45:34,2026-01-09 08:55:34,2026-01-09 02:23:04,2026-01-09 08:08:04,76.0,326,19,90,178,58,0 days 00:37:30,0 days 00:47:30,5.433333,0.276074,0.177914,0.546012,2026-01-09 05:20:34.000,0 days 07:10:00,2026-01-09
9,2026-01-10,18955,3373,80,56,116,97.0,92.0,99.0,41,13,104,2026-01-10 02:19:09,2026-01-10 05:06:39,2026-01-10 02:51:39,2026-01-10 05:01:39,70.0,118,12,45,48,25,0 days 00:32:30,0 days 00:05:00,1.966667,0.381356,0.211864,0.40678,2026-01-10 03:42:54.000,0 days 02:47:30,2026-01-10
