# Cohorts from RCTs 

In [23]:
from __future__ import division
import numpy as np
import pandas as pd
from os import walk
from os import listdir
from os.path import isfile, join 
import itertools 

In [2]:
#path to the files with admissions for each component
path = "../../../patient-similarity/temp/cohort-components/"
files = [f for f in listdir(path) if isfile(join(path, f))]

In [3]:
#get height weights
height_weight = pd.read_csv(join(path, 'height_weight.csv'));

In [4]:
# get ages and determine whose over 18
demo = pd.read_csv(join(path, "demographics.csv"))
adult_ids = (demo[demo['age_group'] == 'adult']['hadm_id']).values.ravel().tolist()
pd.DataFrame(adult_ids).head()

Unnamed: 0,0
0,145834
1,185777
2,107064
3,150750
4,194540


In [5]:
#Go through and check if they were empty, if not, then create a dictionary where the keys are truncated 
#filenames and values are the list of admission ids
a_ids = {}
for fname in files:
    try:
        d_i = pd.read_csv(join(path, fname))
        a_ids[fname[0 : -4]] = d_i.values.ravel().tolist()
        length = len(d_i)

    except:
        length = 0
    
    print "There are {0} admissions for {1}".format(length, fname[0:-4])

There are 0 admissions for extracorporeal_membrane_oxygenation
There are 1286 admissions for cancers
There are 114 admissions for bleeding
There are 28945 admissions for height_weight
There are 5 admissions for mechanical_ventilation
There are 0 admissions for pregnancy
There are 3064 admissions for thrombocytopenia
There are 3206 admissions for Hemodialysis
There are 0 admissions for low_bmi
There are 0 admissions for rheumatoid_arthritis
There are 576 admissions for sedated
There are 1852 admissions for brain_injury
There are 1270 admissions for sepsis
There are 0 admissions for vasopressor_GENERIC_drugs
There are 23 admissions for skin_lesions
There are 829 admissions for vasopressor_infusion
There are 0 admissions for epidural_catheter
There are 0 admissions for high_bmi
There are 280 admissions for intestinal_problems
There are 2994 admissions for cancer_cohort_ids
There are 6 admissions for antiplatlet
There are 31402 admissions for vasopressor_POE_drugs
There are 2950 admissions

In [6]:
#list the keys of the dictionary
a_ids.keys()

['bleeding',
 'antiplatlet',
 'deep_vein_thrombosis',
 'brain_injury',
 'liver_cirrhosis',
 'height_weight',
 'thrombocytopenia',
 'Hemodialysis',
 'sedated',
 'tracheal_intubation',
 'sepsis',
 'tracheostomy',
 'Intracerebral hemorrhage ',
 'traumatic_brain_injury',
 'demographics',
 'hemorrhagic_brain_injury',
 'intestinal_problems',
 'mechanical_ventilation',
 'vasopressor_infusion',
 'skin_lesions',
 'septic_shock',
 'vasopressor_POE_drugs',
 'central_venous_catheter',
 'cancers',
 'cancer_cohort_ids']

# NCT02659839  
### Mortality in Cancer Patients Admitted to the Intensive Care Unit in a Resource-limited Setting

In [7]:
#Create a cohort by finding intersections of sets of ids for AND conditions and unions for OR conditions
res_1 = set(a_ids['cancers']).intersection(
    #Union of a few components
    (a_ids['mechanical_ventilation'] + a_ids['vasopressor_POE_drugs'] + a_ids['Hemodialysis'] ) ,
    adult_ids
)
print(len(res_1))

1076


# NCT02872792, 
### Early Mobilisation in Intensive Care Unit : Interest of Cyclo-ergometry in Patients With Septic Chock


In [8]:
res_2 = set(a_ids['septic_shock']).intersection(
        (a_ids['mechanical_ventilation'] + a_ids['tracheal_intubation']),
        adult_ids
       )



In [9]:
#Remove everything in the set of IDs belonging to the set of IDs with properties that exclude them
exclusion_2 = (
    a_ids['central_venous_catheter'] + 
    a_ids['skin_lesions'] + 
    a_ids['deep_vein_thrombosis'] + 
    a_ids['brain_injury'] )

res_2 = [item for item in res_2 if item not in exclusion_2]
print len(res_2)

1637


# NCT01793363, 
### Tracheostomy and Weaning From Mechanical Ventilation : Evaluation of the Lung Ultrasound Score
    

In [10]:
res_3 = a_ids['tracheostomy']

In [11]:
print len(res_3)

579


# NCT01784159 
### Aspirin for Treatment of Severe Sepsis


In [12]:
res_4 = a_ids['sepsis'] + a_ids['septic_shock']

exclusion_4 =  (
    a_ids['bleeding']+ 
    a_ids['hemorrhagic_brain_injury'] + 
    a_ids['liver_cirrhosis'] + 
    a_ids['traumatic_brain_injury']  +
    a_ids['antiplatlet']  + 
    a_ids['intestinal_problems'] +
    a_ids['thrombocytopenia']
    
)

res_4 = [item for item in res_4 if item not in exclusion_4]
print len(res_4)

2994


In [46]:
lengths = map(len, [res_1, res_2, res_3, res_4])
total_len = sum(lengths)
#make the intersection matrix
intersection_ratio_mat = np.zeros([4,4])
intersection_mat = np.zeros([4,4])
for p in product(enumerate([res_1,res_2, res_3, res_4]) , enumerate([res_1,res_2, res_3, res_4])):
    intsct = len(set(p[0][1]).intersection(p[1][1]))
    intersection_mat[p[0][0], p[1][0]] = intsct
    intersection_ratio_mat[p[0][0], p[1][0]] = intsct/total_len
    
    

In [47]:

list(product(enumerate(["res_1","res_2", "res_3", "res_4"]), enumerate(["res_1","res_2", "res_3", "res_4"])))

[((0, 'res_1'), (0, 'res_1')),
 ((0, 'res_1'), (1, 'res_2')),
 ((0, 'res_1'), (2, 'res_3')),
 ((0, 'res_1'), (3, 'res_4')),
 ((1, 'res_2'), (0, 'res_1')),
 ((1, 'res_2'), (1, 'res_2')),
 ((1, 'res_2'), (2, 'res_3')),
 ((1, 'res_2'), (3, 'res_4')),
 ((2, 'res_3'), (0, 'res_1')),
 ((2, 'res_3'), (1, 'res_2')),
 ((2, 'res_3'), (2, 'res_3')),
 ((2, 'res_3'), (3, 'res_4')),
 ((3, 'res_4'), (0, 'res_1')),
 ((3, 'res_4'), (1, 'res_2')),
 ((3, 'res_4'), (2, 'res_3')),
 ((3, 'res_4'), (3, 'res_4'))]

In [48]:
#Take a look at the cohorts
labels = ["cancer patients","Early Mobilisation in Intensive Care Unit : Interest of Cyclo-ergometry in Patients With Septic Chock",
            "Tracheostomy and Weaning ",  "Aspirin for Treatment " ]

#Normalized cohorts
pd.DataFrame(intersection_ratio_mat,  columns = labels, index =labels)


Unnamed: 0,cancer patients,Early Mobilisation in Intensive Care Unit : Interest of Cyclo-ergometry in Patients With Septic Chock,Tracheostomy and Weaning,Aspirin for Treatment
cancer patients,0.171174,0.000955,0.000795,0.001432
Early Mobilisation in Intensive Care Unit : Interest of Cyclo-ergometry in Patients With Septic Chock,0.000955,0.26042,0.009386,0.185332
Tracheostomy and Weaning,0.000795,0.009386,0.092109,0.014795
Aspirin for Treatment,0.001432,0.185332,0.014795,0.471524


In [49]:
#Unnormalized 
pd.DataFrame(intersection_mat, columns=labels, index = labels)

Unnamed: 0,cancer patients,Early Mobilisation in Intensive Care Unit : Interest of Cyclo-ergometry in Patients With Septic Chock,Tracheostomy and Weaning,Aspirin for Treatment
cancer patients,1076.0,6.0,5.0,9.0
Early Mobilisation in Intensive Care Unit : Interest of Cyclo-ergometry in Patients With Septic Chock,6.0,1637.0,59.0,1165.0
Tracheostomy and Weaning,5.0,59.0,579.0,93.0
Aspirin for Treatment,9.0,1165.0,93.0,2964.0


In [17]:
#Write_to_file
(pd.DataFrame(list(res_1))).to_csv(join(path, "cancer_cohort_ids.csv"))
(pd.DataFrame(list(res_4))).to_csv(join(path, "aspirin_cohort_ids.csv"))

In [None]:
from itertools import permutations_

In [None]:
iproduct