We analyze two types of variables here:
+ Confounding variables (such as age and BMI)
+ Process variables (such as maximum load and maximum heart rate during fitness test)

In [1]:
import sqlite3
from tqdm import tqdm
import sys

sys.path.append("../..")

from utils.constants import DatabaseConfig, TableNames

In [2]:
conn = sqlite3.connect(DatabaseConfig.DB_PATH)
cursor = conn.cursor()
primary_key = 'eid'

# Confounding variables 

+ Create `hypertension_treatment` column in `Processed` table to record if the subject self reports hypertension diagnosed by a doctor, or is taking antihypertensive medication.

In [20]:
# Create a new column in table
cursor.execute(f"""
ALTER TABLE {TableNames.PROCESSED} ADD COLUMN hypertension_treatment INTEGER DEFAULT 0;
""")

# We will use data field 6177, 6153 and 6150
cursor.execute(f"PRAGMA table_info({TableNames.CONFOUNDERS});")
columns = [
    row[1]
    for row in cursor.fetchall()
    if row[1].startswith("6177-0") or row[1].startswith("6153-0") or row[1].startswith("6150-0")
]
columns_escaped = [f"`{col}`" for col in columns]

query_sql = f"""
SELECT {primary_key}, {', '.join(columns_escaped)} 
FROM {TableNames.CONFOUNDERS};
"""
cursor.execute(query_sql)

for row in tqdm(cursor.fetchall()):
    # define If a subject didn't use the bike, or didn't complete the test, the subject will be excluded.
    hypertension_treatment = False

    eid = row[0]

    medication_male = row[1:4]  # 6177
    medication_female = row[4:8]  # 6153
    disease_reported = row[8:12]  # 6150

    if 2 in medication_male or 2 in medication_female:
        # 6177:
        # 1	Cholesterol lowering medication
        # 2	Blood pressure medication
        # 3	Insulin
        # -7	None of the above
        # -1	Do not know
        # -3	Prefer not to answer

        # 6153:
        # 1	Cholesterol lowering medication
        # 2	Blood pressure medication
        # 3	Insulin
        # 4	Hormone replacement therapy
        # 5	Oral contraceptive pill or minipill
        # -7	None of the above
        # -1	Do not know
        # -3	Prefer not to answer
        hypertension_treatment = True

    if 4 in disease_reported:
        # 1	Heart attack
        # 2	Angina
        # 3	Stroke
        # 4	High blood pressure
        # -7	None of the above
        # -3	Prefer not to answer
        hypertension_treatment = True

    if hypertension_treatment:
        update_query = f"UPDATE {TableNames.PROCESSED} SET hypertension_treatment = ? WHERE eid = ?;"
        cursor.execute(update_query, (hypertension_treatment, eid))

conn.commit()

<sqlite3.Cursor at 0x7f27b0f937a0>

# Process variables 

+ Create `test_status` column in `Processed` table to record the status of the fitness test. It will be True if the subject completed the test using the bike.

In [15]:
# Create a new column in table
cursor.execute(f"""
ALTER TABLE {TableNames.PROCESSED} ADD COLUMN test_status INTEGER DEFAULT 1;
""")

# We will use data field 6019 and 6020
query_sql = f"""
SELECT {primary_key}, `6019-0.0`, `6020-0.0` FROM {TableNames.ECG};
"""
cursor.execute(query_sql)

for row in tqdm(cursor.fetchall()):
    # define If a subject didn't use the bike, or didn't complete the test, the subject will be excluded.
    test_status = True
    
    eid = row[0]
    # ECG/Bike method for fitness test
    if row[1] != 1.0:
        # 2.0: resting only
        test_status = False
    # Completion status of fitness test
    if row[2] != 1.0:
        # 31.0: participant wanted to stop early
        # 32.0: participant reported chest-pain or other discomfort
        # 33.0: heart rate too high
        test_status = False

    if not test_status:
        update_query = f"UPDATE {TableNames.PROCESSED} SET test_status = ? WHERE eid = ?;"
        cursor.execute(update_query, (test_status, eid))

conn.commit()

100%|██████████| 77888/77888 [00:00<00:00, 245921.61it/s]
