Count the number of eligible patients in the database.

In [23]:
import sqlite3
import pandas as pd
import sys

sys.path.append("../..")

from utils.constants import DatabaseConfig, TableNames

In [2]:
conn = sqlite3.connect(DatabaseConfig.DB_PATH)
cursor = conn.cursor()
primary_key = "eid"

## Count Number of Eligible Participants

In [12]:
# Total participants
cursor.execute(f"""
    SELECT COUNT(*) AS Total_count
    FROM {TableNames.PROCESSED}
""")
num1 = cursor.fetchall()[0][0]
print(num1)

77888


In [13]:
cursor.execute(f"""
    SELECT COUNT(*) AS Total_count 
    FROM {TableNames.PROCESSED} 
    WHERE ECG_date IS NOT NULL;
""")
num2 = cursor.fetchall()[0][0]
print(num2)

61927


In [14]:
num1 - num2

15961

Those with ECG_date being NULL have empty or corrupted ECG XML files.

In [15]:
# Complete test and has valid ECG
cursor.execute(f"""
    SELECT COUNT(*) AS Total_count 
    FROM {TableNames.PROCESSED} 
    WHERE HRV_available = 1  AND ECG_date IS NOT NULL;
""")
num3 = cursor.fetchall()[0][0]
print(num3)

42216


In [17]:
num2 - num3

19711

In [19]:
# Complete test, has valid ECG and don't take statin
cursor.execute(f"""
    SELECT COUNT(*) AS Total_count
    FROM {TableNames.PROCESSED} 
    WHERE statins = 0 AND HRV_available = 1 AND ECG_date IS NOT NULL;
""")
num4 = cursor.fetchall()[0][0]
print(num4)

35891


In [21]:
num3 - num4

6325

In [33]:
# Complete test, has valid ECG and don't take statin
cursor.execute(f"""
    SELECT COUNT(*) AS Total_count, 
           SUM(CASE WHEN ECG_date > CVD_date THEN 1 ELSE 0 END) AS CVD_before_ECG, 
           SUM(CASE WHEN ECG_date < CVD_date THEN 1 ELSE 0 END) AS CVD_after_ECG
    FROM {TableNames.PROCESSED} 
    WHERE statins = 0 AND HRV_available = 1 AND ECG_date IS NOT NULL
    GROUP BY CVD;
""")
df = pd.DataFrame(cursor.fetchall(), columns=["Total_count", "CVD_before_ECG", "CVD_after_ECG"], index=["no CVD", "CVD"])
print(df)
num5 = df["Total_count"].sum()
num6 = df["CVD_before_ECG"].sum()
num7 = df["CVD_after_ECG"].sum()
print(f"The final study population has {num5 - num6} participants, out of which {num7} have incident CVD")

        Total_count  CVD_before_ECG  CVD_after_ECG
no CVD        30719               0              0
CVD            5172             732           4440
The final study population has 35159 participants, out of which 4440 have incident CVD


12.6% of eligible participants have incident CVD, while the remaining ones don't experience CVD so far.

The censoring date is 2022-10-31.