Skip to content

Commit

Permalink
During preprocessing, lowercase variable name keys in itemid map
Browse files Browse the repository at this point in the history
and reference ranges to make sure we don't fall victim to inconsistent
cases in variable names.
  • Loading branch information
turambar committed Jan 11, 2018
1 parent 4459f04 commit 33513fa
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions mimic3benchmark/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,8 @@ def make_phenotype_label_matrix(phenotypes, stays=None):

def read_itemid_to_variable_map(fn, variable_column='LEVEL2'):
var_map = DataFrame.from_csv(fn, index_col=None).fillna('').astype(str)

var_map[variable_column] = var_map[variable_column].apply(lambda s: s.lower())
var_map.COUNT = var_map.COUNT.astype(int)

var_map = var_map.ix[(var_map[variable_column] != '') & (var_map.COUNT>0)]
var_map = var_map.ix[(var_map.STATUS == 'ready')]
var_map.ITEMID = var_map.ITEMID.astype(int)
Expand All @@ -96,6 +95,7 @@ def read_variable_ranges(fn, variable_column='LEVEL2'):
to_rename = dict(zip(columns, [ c.replace(' ', '_') for c in columns ]))
to_rename[variable_column] = 'VARIABLE'
var_ranges = DataFrame.from_csv(fn, index_col=None)
var_ranges = var_ranges[variable_column].apply(lambda s: s.lower())
var_ranges = var_ranges[columns]
var_ranges.rename_axis(to_rename, axis=1, inplace=True)
var_ranges = var_ranges.drop_duplicates(subset='VARIABLE', keep='first')
Expand Down

0 comments on commit 33513fa

Please sign in to comment.