Here we add a column to the `Processed` table, indicating that relevant HRV indices are available for certain participants.

+ Create `HRV_available` column in `Processed` table to record if the HRV indices for the subject are available.

In [6]:
import sqlite3
import pandas as pd
from tqdm import tqdm
import sys

sys.path.append("../..")

from utils.constants import DatabaseConfig, TableNames

In [2]:
conn = sqlite3.connect(DatabaseConfig.DB_PATH)
cursor = conn.cursor()
primary_key = "eid"

In [3]:
cursor.execute(f"""
ALTER TABLE {TableNames.PROCESSED} ADD COLUMN HRV_available INTEGER DEFAULT 0;
""")

# We will use the eid in the `HRV_time` table
query_sql = f"""
SELECT {primary_key} FROM {TableNames.HRV_TIME}
"""
cursor.execute(query_sql)
eids = [row[0] for row in cursor.fetchall()]

# Update the column
for eid in tqdm(eids):
    update_sql = f"UPDATE {TableNames.PROCESSED} SET HRV_available = 1 WHERE eid = ?;"
    cursor.execute(update_sql, (eid,))
conn.commit()

100%|██████████| 42216/42216 [00:00<00:00, 100447.38it/s]


In [7]:
count_sql = f"""
SELECT test_status, HRV_available, COUNT(*) FROM {TableNames.PROCESSED} 
GROUP BY test_status, HRV_available
ORDER BY test_status, HRV_available;
"""
cursor.execute(count_sql)
df = pd.DataFrame(cursor.fetchall(), columns=["test_status", "HRV_available", "count"])
print(df)

   test_status  HRV_available  count
0            0              0  14102
1            0              1   1350
2            1              0  21570
3            1              1  40866


After investigation, we find that those who report "Participant wanted to stop early" or "Heart rate reached safety level" still may have HRV indices available.

**Therefore, we decide to only use `HRV_available` rather than `test_status` for inclusion criteria**.