Skip to content

Commit

Permalink
Add updated decompensation/utils.py
Browse files Browse the repository at this point in the history
  • Loading branch information
hrayrhar committed Jan 18, 2018
1 parent 3007e9a commit f8e4c5f
Showing 1 changed file with 71 additions and 41 deletions.
112 changes: 71 additions & 41 deletions mimic3models/decompensation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,6 @@
import random


def read_chunk(reader, chunk_size):
data = []
labels = []
ts = []
header = None
for i in range(chunk_size):
(X, t, y, header) = reader.read_next()
data.append(X)
ts.append(t)
labels.append(y)
return (data, ts, labels, header)


def preprocess_chunk(data, ts, discretizer, normalizer=None):
data = [discretizer.transform(X, end=t)[0] for (X, t) in zip(data, ts)]
if (normalizer is not None):
Expand All @@ -29,11 +16,13 @@ def preprocess_chunk(data, ts, discretizer, normalizer=None):
class BatchGen(object):

def __init__(self, reader, discretizer, normalizer,
batch_size, steps, shuffle):
batch_size, steps, shuffle, return_names=False):
self.reader = reader
self.discretizer = discretizer
self.normalizer = normalizer
self.batch_size = batch_size
self.shuffle = shuffle
self.return_names = return_names

if steps is None:
self.n_examples = reader.get_number_of_examples()
Expand All @@ -42,8 +31,7 @@ def __init__(self, reader, discretizer, normalizer,
self.n_examples = steps * batch_size
self.steps = steps

self.shuffle = shuffle
self.chunk_size = min(1024, steps) * batch_size
self.chunk_size = min(1024, self.steps) * batch_size
self.lock = threading.Lock()
self.generator = self._generator()

Expand All @@ -56,14 +44,25 @@ def _generator(self):
while remaining > 0:
current_size = min(self.chunk_size, remaining)
remaining -= current_size
(data, ts, labels, header) = read_chunk(self.reader, current_size)

ret = common_utils.read_chunk(self.reader, current_size)
data = ret["X"]
ts = ret["t"]
labels = ret["y"]
names = ret["name"]

data = preprocess_chunk(data, ts, self.discretizer, self.normalizer)
data = (data, labels)
data = common_utils.sort_and_shuffle(data, B)

for i in range(0, current_size, B):
yield (nn_utils.pad_zeros(data[0][i:i + B]),
np.array(data[1][i:i + B]))
X = nn_utils.pad_zeros(data[0][i:i + B])
y = np.array(data[1][i:i + B])
batch_data = (X, y)
if not self.return_names:
yield batch_data
else:
yield {"data": batch_data, "names": names, "ts": ts}

def __iter__(self):
return self.generator
Expand All @@ -78,12 +77,15 @@ def __next__(self):

class BatchGenDeepSupervisoin(object):

def __init__(self, dataloader, discretizer, normalizer, batch_size, shuffle):
self.data = self._load_per_patient_data(dataloader, discretizer,
normalizer)
def __init__(self, dataloader, discretizer, normalizer,
batch_size, shuffle, return_names=False):
self.batch_size = batch_size
self.steps = (len(self.data[1]) + batch_size - 1) // batch_size
self.shuffle = shuffle
self.return_names = return_names

self._load_per_patient_data(dataloader, discretizer, normalizer)

self.steps = (len(self.data[1]) + batch_size - 1) // batch_size
self.lock = threading.Lock()
self.generator = self._generator()

Expand All @@ -94,32 +96,46 @@ def get_bin(t):
eps = 1e-6
return int(t / timestep - eps)

N = len(dataloader._data)
N = len(dataloader._data["X"])
Xs = []
ts = []
masks = []
ys = []
names = []

for i in range(N):
(X, positions, labels) = dataloader._data[i]
labels = [int(x) for x in labels]
X = dataloader._data["X"][i]
cur_ts = dataloader._data["ts"][i]
cur_ys = dataloader._data["ys"][i]
name = dataloader._data["name"][i]

cur_ys = [int(x) for x in cur_ys]

T = max(cur_ts)
nsteps = get_bin(T) + 1
mask = [0] * nsteps
y = [0] * nsteps

T = max(positions)
mask = [0] * (get_bin(T) + 1)
y = [0] * (get_bin(T) + 1)
for pos, z in zip(positions, labels):
for pos, z in zip(cur_ts, cur_ys):
mask[get_bin(pos)] = 1
y[get_bin(pos)] = z

X = discretizer.transform(X, end=T)[0]
if (normalizer is not None):
X = normalizer.transform(X)

Xs.append(X)
masks.append(np.array(mask))
ys.append(np.array(y))
names.append(name)
ts.append(cur_ts)

assert np.sum(mask) > 0
assert len(X) == len(mask) and len(X) == len(y)

return [[Xs, masks], ys]
self.data = [[Xs, masks], ys]
self.names = names
self.ts = ts

def _generator(self):
B = self.batch_size
Expand All @@ -128,29 +144,43 @@ def _generator(self):
N = len(self.data[1])
order = range(N)
random.shuffle(order)
tmp = [[[None]*N, [None]*N], [None]*N]
tmp_data = [[[None]*N, [None]*N], [None]*N]
tmp_names = [None] * N
tmp_ts = [None] * N
for i in range(N):
tmp[0][0][i] = self.data[0][0][order[i]]
tmp[0][1][i] = self.data[0][1][order[i]]
tmp[1][i] = self.data[1][order[i]]
self.data = tmp
tmp_data[0][0][i] = self.data[0][0][order[i]]
tmp_data[0][1][i] = self.data[0][1][order[i]]
tmp_data[1][i] = self.data[1][order[i]]
tmp_names[i] = self.names[order[i]]
tmp_ts[i] = self.ts[order[i]]
self.data = tmp_data
self.names = tmp_names
self.ts = tmp_ts
else:
# sort entirely
Xs = self.data[0][0]
masks = self.data[0][1]
ys = self.data[1]
(Xs, masks, ys) = common_utils.sort_and_shuffle([Xs, masks, ys], B)
(Xs, masks, ys, self.names, self.ts) = common_utils.sort_and_shuffle([Xs, masks, ys,
self.names, self.ts], B)
self.data = [[Xs, masks], ys]

for i in range(0, len(self.data[1]), B):
X = self.data[0][0][i:i+B]
mask = self.data[0][1][i:i+B]
y = self.data[1][i:i+B]
X = self.data[0][0][i:i + B]
mask = self.data[0][1][i:i + B]
y = self.data[1][i:i + B]
names = self.names[i:i + B]
ts = self.ts[i:i + B]

X = nn_utils.pad_zeros(X) # (B, T, D)
mask = nn_utils.pad_zeros(mask) # (B, T)
y = nn_utils.pad_zeros(y)
y = np.expand_dims(y, axis=-1) # (B, T, 1)
yield ([X, mask], y)
batch_data = ([X, mask], y)
if not self.return_names:
yield batch_data
else:
yield {"data": batch_data, "names": names, "ts": ts}

def __iter__(self):
return self.generator
Expand Down

0 comments on commit f8e4c5f

Please sign in to comment.