In [1]:
# Import libraries
import os
import pandas as pd
import sqlite3

In [2]:
# Set working directory
desired_dir = 'C:/Users/idris/Downloads/Datasets'
os.chdir(desired_dir)

In [3]:
# Read SAS files
ghb = pd.read_sas('P_GHB.XPT')
fast_gluc = pd.read_sas('P_GLU.XPT')

In [4]:
# Establish a connection to SQLite database
conn = sqlite3.connect('Lab Records.db')

In [5]:
# Move tables to Lab Records database
ghb.to_sql('ghb',conn, if_exists='replace',index=False)
fast_gluc.to_sql('fast_gluc',conn, if_exists='replace',index=False)

5090

In [6]:
# Full join the fasting glucose and ghb table
lab_records = '''
CREATE TABLE lab_records AS 
    SELECT * FROM fast_gluc
        INNER JOIN ghb ON fast_gluc.SEQN = ghb.SEQN
'''
cursor = conn.cursor()
cursor.execute(lab_records)

<sqlite3.Cursor at 0x224891e91f0>

In [7]:
# Create new column for the associated blood glucose and doctor follow-up
diabetes_diagnosis = '''
CREATE TABLE diabetes AS 
    SELECT SEQN, LBXGLU, LBXGH,
        CASE
            WHEN LBXGLU < 100 AND LBXGH < 5.7 THEN 'Non-Diabetic' 
            WHEN (LBXGLU >= 100 AND LBXGLU < 126) OR (LBXGH >= 5.7 AND LBXGH < 6.5) THEN 'Pre-Diabetic'
            WHEN (LBXGLU >= 100 AND LBXGLU < 126 AND LBXGH IS NULL) OR (LBXGLU IS NULL AND LBXGH >= 5.7 AND LBXGH < 6.5) THEN 'Pre-Diabetic'
            WHEN LBXGLU >= 126 OR LBXGH >= 6.5 THEN 'Diabetic'
            WHEN (LBXGLU >= 126 AND LBXGH IS NULL) OR (LBXGLU IS NULL AND LBXGH >= 6.5) THEN 'Diabetic'
            ELSE 'Inconclusive'
        END AS Diabetes_Status,
        CASE
            WHEN (LBXGLU < 100 AND LBXGH < 5.7) OR (LBXGLU >= 100 AND LBXGLU < 126) OR (LBXGH >= 5.7 AND LBXGH < 6.5) THEN "No"
            ELSE 'Yes'
        END AS Doctor_Followup
    FROM lab_records;
'''
cursor = conn.cursor()

cursor.execute(diabetes_diagnosis)

<sqlite3.Cursor at 0x224891e93b0>

In [8]:
# Print first 25 rows
cursor.execute('SELECT * FROM diabetes')
rows = cursor.fetchmany(25)
for row in rows:
    print(row)

(109264.0, 97.0, 5.3, 'Non-Diabetic', 'No')
(109271.0, 103.0, 5.6, 'Pre-Diabetic', 'No')
(109274.0, 154.0, 5.7, 'Pre-Diabetic', 'No')
(109277.0, 92.0, 5.3, 'Non-Diabetic', 'No')
(109282.0, 95.0, 5.5, 'Non-Diabetic', 'No')
(109286.0, 92.0, 5.7, 'Pre-Diabetic', 'No')
(109290.0, 106.0, 8.4, 'Pre-Diabetic', 'No')
(109292.0, 181.0, 6.4, 'Pre-Diabetic', 'No')
(109297.0, 92.0, 5.1, 'Non-Diabetic', 'No')
(109300.0, 103.0, 5.4, 'Pre-Diabetic', 'No')
(109305.0, 105.0, 5.2, 'Pre-Diabetic', 'No')
(109307.0, 103.0, 5.3, 'Pre-Diabetic', 'No')
(109313.0, 97.0, 5.9, 'Pre-Diabetic', 'No')
(109317.0, 88.0, 5.0, 'Non-Diabetic', 'No')
(109322.0, 89.0, 5.4, 'Non-Diabetic', 'No')
(109323.0, 104.0, 5.2, 'Pre-Diabetic', 'No')
(109324.0, 86.0, None, 'Inconclusive', 'Yes')
(109326.0, 95.0, 5.1, 'Non-Diabetic', 'No')
(109327.0, 98.0, 5.6, 'Non-Diabetic', 'No')
(109330.0, 109.0, 5.6, 'Pre-Diabetic', 'No')
(109331.0, 96.0, 5.3, 'Non-Diabetic', 'No')
(109332.0, 88.0, 5.0, 'Non-Diabetic', 'No')
(109335.0, 98.0, 6.3,

In [9]:
# SQL query to retrieve data from the 'diabetes' table
query = "SELECT * FROM diabetes;"

# Fetch data from SQL into a pandas DataFrame
diabetes_data = pd.read_sql_query(query, conn)

# Export the pandas DataFrame to a CSV file
diabetes_data.to_csv('diabetes_data.csv', index=False)  # Change 'diabetes_data.csv' to your desired file name

In [10]:
# Commit changes and close the connection
conn.commit()
conn.close()