In [1]:
import os
import sqlite3

In [5]:
# Recall the table structure of our new combined research database

patients = ['patient_id', 'dob', 'gender', 'race', 'postal_code', 'occupation'] 
admissions = ['patient_id', 'hadm_id', 'intime', 'outtime']
procedures = ['hadm_id', 'icd9_code'] 
diagnoses = ['hadm_id', 'icd9_code']
chartevents = ['hadm_id', 'item_id', 'value', 'datetime']
bpstream = ['patient_id', 'datetime', 'bp_min', 'bp_max']
medications = ['patient_id', 'datetime', 'drug']

# And the following dictionaries for codes
d_procedures = {3000:'Carotid endarterectomy'}
d_diagnoses = {10000:'diabetes', 10001:'hiv', 10002:'cancer'}
d_chartevents = {2000:'weight', 2001:'abp'}

In [None]:
# We want to end up with one table of features. Each row contains all the features 
# for one patient

# Final desired features (and outcome) for each patient.

features = [
    'age', 'gender', 'race', 'postal_code', 'occupation',
    # diagnoses
    'diabetes', 'hiv', 'cancer',
    # chartevents
    'weight_hosp', 'bp_hosp_min', 'bp_hosp_max',
    # bp numerics from wearables
    'bp_disch_min', 'bp_disch_max',
    # medications
    'dexamethasone', 'erlotinib'
    # outcome
    'readmission'
]


In [None]:
# Data Extraction

# Cohort: Patients who received carotid endartorectomy

# Get the hadms with the endartorectory procedure

/* Combine with the demographic information */


with t0 as(
    select pr.hadm_id, a.intime, a.outtime, a.patient_id
    from procedures pr
    left join admissions a on pr.hadm_id = a.hadm_id
    where pr.icd9_code = 3000
),
-- Get the demographic info of the patients
t1 as( 
    select t0.patient_id, t0.hadm_id, t0.intime, t0.outtime,
        p.gender, p.race, p.postal_code, p.occupation,
        Cast((JulianDay(t0.outtime) - JulianDay(p.dob)) / 365 as float) age
    from t0
    left join patients p on t0.patient_id = p.patient_id
),
-- Get the weights
weights as(
    select hadm_id, max(value) weight
    from chartevents
    where item_id == 2000
    group by hadm_id
),
-- Get the max and min bps during the admission
bphospital as( 
    select hadm_id, max(value) bp_hosp_max, min(value) bp_hosp_min
    from chartevents
    where item_id == 2001
    group by hadm_id
),
-- For each hadm_id, get whether diagnoses were made
comorbidities as (  
    select hadm_id, 
        max(case icd9_code when 10000 then 1 else 0 end) diabetes,
        max(case icd9_code when 10001 then 1 else 0 end) hiv,
        max(case icd9_code when 10002 then 1 else 0 end) cancer
    from diagnoses
    group by hadm_id
),
-- Get the desired medications
drugs as(
    select patient_id, datetime,
        max(case drug when 'Dexamethasone' then 1 else 0 end) dexamethasone,
        max(case drug when 'Erlotinib' then 1 else 0 end) erlotinib
    from medications
    group by patient_id
)



select * from bpwearable;

select *
from bpstream
group by patient_id





    
    

In [4]:
from sklearn import svm, neighbors
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

In [None]:
# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(features[:, :-1], features[:, -1],
                                                    train_size=0.75, test_size=0.25,
                                                    random_state=0)
print('Number of training records: %d' % len(x_train))
print('Number of testing records: %d' % len(x_test))
