In [12]:
import torch.utils.data as data
import os
import torch
import random
import pandas as pd
import numpy as np

data_path=['/Users/yeseullee/Documents/ECE271B/MIMICii/part1',
'/Users/yeseullee/Documents/ECE271B/MIMICii/part2',
'/Users/yeseullee/Documents/ECE271B/MIMICii/part3',
'/Users/yeseullee/Documents/ECE271B/MIMICii/part4']

label_path=['/Users/yeseullee/Documents/ECE271B/MIMICii/part1/label1.csv',
'/Users/yeseullee/Documents/ECE271B/MIMICii/part2/label2.csv',
'/Users/yeseullee/Documents/ECE271B/MIMICii/part3/label3.csv',
'/Users/yeseullee/Documents/ECE271B/MIMICii/part4/label4.csv']


class MIMICDataset(data.Dataset):
    def __init__(self, data_path, label_path, normalize=None, preprocessing=False, choose_class=[0,1,2]):
        self.data_path=data_path
        self.label_path=label_path
        self.normalize=normalize
        self.preprocessing=preprocessing
        self.choose_class=choose_class
        
        self.data = []
        self.label = []
        self.subjectid = []
        
        #read BP labels
        label_df1=pd.read_csv(label_path[0])
        label_df2=pd.read_csv(label_path[1])
        label_df3=pd.read_csv(label_path[2])
        label_df4=pd.read_csv(label_path[3])
        label_dfs=[label_df1,label_df2,label_df3,label_df4]
        
        class_id = [[] for i in range(3)]
        for i in range(0,4):
            for n in range(0,len(label_dfs[i])):
                if label_dfs[i]['hypertension_level'][n]=='Hypertension':
                    class_id[2].append(str(i+1)+'_'+(label_dfs[i]['subject_id'][n]))
                elif label_dfs[i]['hypertension_level'][n]=='Prehypertension':
                    class_id[1].append(str(i+1)+'_'+(label_dfs[i]['subject_id'][n]))
                elif label_dfs[i]['hypertension_level'][n]=='Normal':
                    class_id[0].append(str(i+1)+'_'+(label_dfs[i]['subject_id'][n]))    
                
        freq=125
        sec=5
        for c in choose_class:
            for sub in class_id[c]:
                seg_path = os.path.join(self.data_path[int(sub[0])-1], '{}.txt'.format(sub[2:]))
                if os.path.exists(seg_path):
                    with open(seg_path) as f:
                        lines = f.readlines()[0].split('\t')[:-1]
                if len(lines) != freq*sec:
                    print(subject, subjectid, segment, len(lines))
                    continue

                seg = torch.Tensor([float(x) for x in lines])
                seg = seg.reshape((1,freq*sec))
                self.data.append(seg)
                self.label.append(c)
                self.subjectid.append(sub)
        
        self.label = torch.Tensor(self.label)
        self.label = self.label.type(torch.long)
        
        with open(r'./subjectid_NT_PHT_HT.txt', 'w') as fp:
            for item in self.subjectid:
                # write each item on a new line
                fp.write("%s\n" % item)
        
        torch.save(self.data,'dataset_NT_PHT_HT.pt')
        torch.save(self.label,'label_NT_PHT_HT.pt')
        
    def median_filter(tensor_data):
        x,y = tensor_data.size()
        new_data = torch.zeros(x,y)
        for i in range(x):
            for j in range(y):
                if j == 0:
                    new_data[i][j] = tensor_data[i][j]
                elif j < 11:
                    new_data[i][j] = tensor_data[i][0: 2 * j + 1].median()
                elif 11 <= j < y - 11:
                    new_data[i][j] = tensor_data[i][j - 11: j + 12].median()
                elif y - 11 <= j < y - 1:
                    new_data[i][j] = tensor_data[i][2*j - y : y].median()
                elif j == y - 1:
                    new_data[i][j] = tensor_data[i][j]
        return new_data

    def roll_filter(tensor_data):
        x,y = tensor_data.size()
        new_data = torch.zeros(x,y)
        for i in range(x):
            for j in range(y):
                if j == 0:
                    new_data[i][j] = tensor_data[i][j]
                elif j < 11:
                    new_data[i][j] = tensor_data[i][0: 2 * j + 1].mean()
                elif 11 <= j < y - 11:
                    new_data[i][j] = tensor_data[i][j - 11: j + 12].mean()
                elif y - 11 <= j < y - 1:
                    new_data[i][j] = tensor_data[i][2*j - y : y].mean()
                elif j == y - 1:
                    new_data[i][j] = tensor_data[i][j]
        return new_data            
                
    def __getitem__(self, index):
        data = self.data[index]
        if self.preprocessing:
            data = roll_filter(median_filter(data))
        if self.normalize:
            data = (data-self.normalize['mean'])/self.normalize['std'] #normalization
        label = self.label[index]
        subjectid = self.subjectid[index]

        if self.choose_class==[0,2] and label==2:
            label=torch.tensor(1)

        return data, label, subjectid

    def __len__(self):
        return len(self.data)

In [9]:
data_mean=1.59
data_std=0.73
data_normalization = {'mean':data_mean,'std':data_std}
dataset=MIMICDataset(data_path, label_path, normalize=data_normalization, preprocessing=False, choose_class=[0,1])

In [10]:
data_mean=1.59
data_std=0.73
data_normalization = {'mean':data_mean,'std':data_std}
dataset=MIMICDataset(data_path, label_path, normalize=data_normalization, preprocessing=False, choose_class=[0,2])

In [13]:
data_mean=1.59
data_std=0.73
data_normalization = {'mean':data_mean,'std':data_std}
dataset=MIMICDataset(data_path, label_path, normalize=data_normalization, preprocessing=False, choose_class=[0,1,2])