In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [1]:
import os
import numpy as np

from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

#from google.colab import file


In [2]:
data_path = os.path.join("ICHI14_dataset\data")
patient_list = ['002','003','005','007','08a','08b','09a','09b', '10a','011','013','014','15a','15b','016',
            '017','018','019','020','021','022','023','025','026','027','028','029','030','031','032',
            '033','034','035','036','037','038','040','042','043','044','045','047','048','049','051']

In [18]:
train_patient_list, test_patient_list = train_test_split(patient_list, random_state=100, test_size=0.3)
test_patient_list, valid_patient_list = train_test_split(test_patient_list, random_state=100, test_size=0.5)

In [19]:
print(len(patient_list))
print(len(train_patient_list))
print(len(valid_patient_list))
print(len(test_patient_list))

45
31
7
7


In [20]:
print(train_patient_list)
print(valid_patient_list)
print(test_patient_list)

['022', '09b', '048', '020', '023', '15b', '003', '042', '15a', '038', '025', '011', '018', '029', '031', '014', '08a', '047', '049', '016', '040', '005', '037', '033', '013', '017', '026', '044', '007', '027', '10a']
['035', '034', '051', '019', '045', '043', '08b']
['021', '09a', '002', '028', '032', '036', '030']


In [21]:
def change_labels(sample):
    """
    Returns:
    sample - contains only label 1(awake) and 0(sleep) for polisomnography
    """
    
    sample.gt[sample.gt==0] = 8
    sample.gt[np.logical_or.reduce((sample.gt==1, sample.gt==2, sample.gt==3, sample.gt==5))] = 0
    sample.gt[np.logical_or.reduce((sample.gt==6, sample.gt==7, sample.gt==8))] = 1
    
    return sample   

#-------------------------------------------------------------------------

def decoder(sample):
    '''
    Returns: 
    decoded_sample - contains accelerometer and ps data for each sensor record, ndarray of shape (n_records, 4)
    
    '''

    sample = np.repeat(sample, sample.d, axis=0)
    n_records = sample.shape[0]
    decoded_sample = np.zeros((n_records, 4))
    
    decoded_sample[:, 0] = sample.x
    decoded_sample[:, 1] = sample.y
    decoded_sample[:, 2] = sample.z
    decoded_sample[:, 3] = sample.gt
    
    return decoded_sample

#-------------------------------------------------------------------------

def divide_by_windows(decoded_sample, window_len=60):
    """
    Parameters:
    wondow_len - length of each window in seconds, int
    Returns:
    X - accelerometer data, ndarray of shape (n_windows, window_len, 3)
    y - polisomnography data, ndarray of shape (n_windows, )
    """
    
    window_len *= 100
    n_windows = decoded_sample.shape[0] // window_len
    
    X = np.zeros((n_windows, window_len, 3))
    y = np.zeros(n_windows)
    
    for i in range(n_windows):
        X[i] = decoded_sample[window_len * i: window_len * i + window_len, 0: 3]
        
        ones = np.count_nonzero(decoded_sample[window_len*i: window_len*i+window_len, 3])
        if ones >= (window_len / 2):
            y[i] = 1
        else:
            y[i] = 0
                
    return X, y

#-------------------------------------------------------------------------

def get_one_patient_data(data_path, patient, window_len=60):
    
    """
    Returns:
    X, y - for one patient
    """
    
    sample = np.load("%s/p%s.npy"%(data_path, patient)).view(np.recarray)
    sample = change_labels(sample)
    sample = decoder(sample)
    X, y = divide_by_windows(sample, window_len)
    
    return X, y

#-------------------------------------------------------------------------

def get_data_for_model(data_path, patient_list, window_len=60):
    
    """
    Returns:
    X, y - for all patient list, ndarray of shape (n_records, n_features, n_channels=3)
    """
    
    X_all_data = []
    y_all_data = []
    for patient in patient_list:
        X, y = get_one_patient_data(data_path, patient, window_len)
        X_all_data.append(X)
        y_all_data.append(y)
        
    X_all_data = np.concatenate(X_all_data, axis=0)
    y_all_data = np.concatenate(y_all_data, axis=0)
    
    return X_all_data, y_all_data
  
#-------------------------------------------------------------------------

def get_dawnsampled_data(data_path, patient_list, window_len=60, dawnsample="pca", n_components=10, n_windows=10):
    
    """
    Parameters:
    dawnsample - "pca", "mean", "max", "mode", None - determine the type of data reducing
    Returns:
    X, y - reduced data for all patient list and combine several windows data, ndarray of shape (n_records, n_components * n_windows, n_channels=3)
    """
    
    X_all_data = []
    y_all_data = []
    for patient in patient_list:
        X, y = get_one_patient_data(data_path, patient, window_len)
        
        if dawnsample.lower() == "pca":
            X = reduce_data_pca(X, n_components=n_components)
          
        elif dawnsample.lower() == "mean":
            X = reduce_data_mean(X, n_components=n_components)
          
        elif dawnsample.lower() == "max":
            X = reduce_data_max(X, n_components=n_components)
          
        elif dawnsample.lower() == "mode":
            X = reduce_data_mode(X, n_components=n_components)
          
        elif dawnsample.lower() == "simple":
            X = reduce_data_simple(X, n_components=n_components)
        
        
        X_new = np.zeros((X.shape[0] - n_windows, X.shape[1] * (n_windows + 1), X.shape[2]))
        
        for i in range(0, X.shape[0] - n_windows):
            X_buff = X[i]
            for j in range(1, n_windows + 1):
                X_buff = np.concatenate([X_buff, X[i+j]], axis=0)
            X_new[i] = X_buff                            
    
    
        if n_windows != 0:
          y = y[(n_windows//2): -(n_windows//2)]
      
        
        X_all_data.append(X_new)
        y_all_data.append(y)

        #np.save(("X_p%s.npy"%(patient)), X_new)
        #np.save(("y_p%s.npy"%(patient)), y)
        
    X_all_data = np.concatenate(X_all_data, axis=0)
    y_all_data = np.concatenate(y_all_data, axis=0)
    
    
    
    return X_all_data, y_all_data
  
def reduce_data_pca(X, n_components=300):
    """
    Parameters:
    X - ndarray of shape (n_samples, n_features)
    
    Returns:
    X, y - reduced data, ndarray of shape (n_records, n_features, n_channels=3)
    """
    pca1 = PCA(n_components)
    pca2 = PCA(n_components)
    pca3 = PCA(n_components)
    
    pca1.fit(X[:, :, 0])
    pca2.fit(X[:, :, 1])
    pca3.fit(X[:, :, 2])
    
    X1 = pca1.transform(X[:, :, 0])
    X2 = pca2.transform(X[:, :, 1])
    X3 = pca3.transform(X[:, :, 2])
    
    X_reduced = np.concatenate([X1, X2, X3], axis=1).reshape(X.shape[0], n_components, 3)
    
    return X_reduced


def reduce_data_max(X, n_components=600):
    """
    Parameters:
    X - ndarray of shape (n_samples, n_features)
    
    Returns:
    X, y - reduced data, ndarray of shape (n_records, n_components, n_channels=3)
    """
   
    
    X_reduced = np.zeros((X.shape[0], n_components, 3))
    window_len = X.shape[1] // n_components
    
    
    for i in range(n_components):
      
      X_reduced[:, i, :] = np.amax(X[:, i * window_len: (i + 1) * window_len, :], axis=1)
      
    
    X_reduced = X_reduced.reshape(X.shape[0], n_components, 3)
    
    return X_reduced
  

def reduce_data_mean(X, n_components=600):
    """
    Parameters:
    X - ndarray of shape (n_samples, n_features)
    
    Returns:
    X, y - reduced data, ndarray of shape (n_records, n_components, n_channels=3)
    """
   
    
    X_reduced = np.zeros((X.shape[0], n_components, 3))
    window_len = X.shape[1] // n_components
    
    
    for i in range(n_components):
      
      X_reduced[:, i, :] = np.mean(X[:, i * window_len: (i + 1) * window_len, :], axis=1)
         
    X_reduced = X_reduced.reshape(X.shape[0], n_components, 3)
    
    return X_reduced
  
    
def reduce_data_mode(X, n_components=600):
    """
    Parameters:
    X - ndarray of shape (n_samples, n_features)
    
    Returns:
    X, y - reduced data, ndarray of shape (n_records, n_components, n_channels=3)
    """
    
    from scipy.stats import mode
   
    X_reduced = np.zeros((X.shape[0], n_components, 3))
    window_len = X.shape[1] // n_components
       
    for i in range(n_components):
      
      X_reduced[:, i, :] = mode(X[:, i * window_len: (i + 1) * window_len, :], axis=1)
         
    X_reduced = X_reduced.reshape(X.shape[0], n_components, 3)
    
    return X_reduced
  
def reduce_data_simple(X, n_components=600):
    """
    Parameters:
    X - ndarray of shape (n_samples, n_features)
    
    Returns:
    X, y - reduced data, ndarray of shape (n_records, n_components, n_channels=3)
    """
   
    X_reduced = np.zeros((X.shape[0], n_components, 3))
    window_len = X.shape[1] // n_components
       
    for i in range(n_components):
      
      X_reduced[:, i, :] = X[:, i * window_len, :]
         
    X_reduced = X_reduced.reshape(X.shape[0], n_components, 3)
    
    return X_reduced

In [104]:
X_train, y_train = get_data_for_model(data_path, train_patient_list, window_len=240)
X_valid, y_valid = get_data_for_model(data_path, valid_patient_list, window_len=240)
X_test, y_test = get_data_for_model(data_path, test_patient_list, window_len=240)

In [105]:
print(X_train.shape)
print(X_valid.shape)
print(X_test.shape)
np.min(X_train)

(3949, 24000, 3)
(890, 24000, 3)
(948, 24000, 3)


0.0

In [22]:
%%time
X_train, y_train = get_dawnsampled_data(data_path, train_patient_list, window_len=60, dawnsample="pca", n_components=60, n_windows=12)
X_valid, y_valid = get_dawnsampled_data(data_path, valid_patient_list, window_len=60, dawnsample="pca", n_components=60, n_windows=12)
X_test, y_test = get_dawnsampled_data(data_path, test_patient_list, window_len=60, dawnsample="pca", n_components=60, n_windows=12)

Wall time: 1min 13s


In [23]:
print(X_train.shape)
print(y_train.shape)
print(X_valid.shape)
print(X_test.shape)

(15463, 780, 3)
(15463,)
(3481, 780, 3)
(3721, 780, 3)


In [33]:
size = int(np.sqrt(X_train.shape[1]))
print(size**2)
X_train_new = X_train[:, :size**2, :].reshape(X_train.shape[0], size, size, 3)
X_test_new = X_test[:, :size**2, :].reshape(X_test.shape[0], size, size, 3)
X_valid_new = X_valid[:, :size**2, :].reshape(X_valid.shape[0], size, size, 3)

5329


In [34]:
print(X_train_new.shape)
print(y_train.shape)
print(X_valid_new.shape)
print(X_test_new.shape)

(15587, 73, 73, 3)
(15587,)
(3509, 73, 73, 3)
(3749, 73, 73, 3)


In [13]:
from keras.applications import VGG16, VGG19, Xception

Using TensorFlow backend.


In [10]:
from keras.layers import Dense, Flatten, Dropout
from keras.layers import Conv1D, MaxPooling1D, Activation
from keras.models import Sequential
from keras.optimizers import SGD, adam
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2

from keras.callbacks import ModelCheckpoint, EarlyStopping

Using TensorFlow backend.


In [36]:
vgg16_net = VGG19(weights='imagenet', 
                  include_top=False, 
                  input_shape=(size, size, 3))
vgg16_net.trainable = False

In [44]:
model = Sequential()

model.add(vgg16_net)
model.add(Flatten())

#model.add(Dense(256))
#model.add(Activation('relu'))
#model.add(Dropout(0.5))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(1))
model.add(Activation('softmax'))

model.compile(loss='binary_crossentropy',
              optimizer=adam(lr=1e-5), 
              metrics=['accuracy'])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg19 (Model)                (None, 2, 2, 512)         20024384  
_________________________________________________________________
flatten_4 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_6 (Dense)              (None, 64)                131136    
_________________________________________________________________
activation_5 (Activation)    (None, 64)                0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 65        
_________________________________________________________________
activation_6 (Activation)    (None, 1)                 0         
Total para

In [45]:
callbacks = [ModelCheckpoint('CNN2_model_raw_data_weights.hdf5', monitor='val_acc', save_best_only=True), EarlyStopping(monitor='val_loss', patience=5)]

In [None]:
%%time

model.fit(X_train_new, y_train,
       batch_size=64, 
       epochs=30, 
       validation_data=(X_valid_new, y_valid), 
       callbacks=callbacks,
       verbose=1)

In [24]:
NN = Sequential()

NN.add(Conv1D( 64, 3, input_shape=(780, 3), activation="relu", kernel_initializer="he_uniform", kernel_regularizer=l2(0.1)))
NN.add(BatchNormalization())
NN.add(Dropout(0.5))
NN.add(Conv1D( 64, 3, activation="relu", kernel_initializer="he_uniform", kernel_regularizer=l2(0.1)))
NN.add(BatchNormalization())
NN.add(MaxPooling1D(pool_size=2))
NN.add(Dropout(0.5))

NN.add(Conv1D( 128, 3, activation="relu", kernel_initializer="he_uniform", kernel_regularizer=l2(0.1)))
NN.add(BatchNormalization())
NN.add(Dropout(0.5))
NN.add(Conv1D( 128, 3, activation="relu", kernel_initializer="he_uniform", kernel_regularizer=l2(0.1)))
NN.add(BatchNormalization())
NN.add(MaxPooling1D(pool_size=2))
NN.add(Dropout(0.5))

NN.add(Conv1D( 256, 3, activation="relu", kernel_initializer="he_uniform", kernel_regularizer=l2(0.1)))
NN.add(BatchNormalization())
NN.add(Dropout(0.5))
NN.add(Conv1D( 256, 3, activation="relu", kernel_initializer="he_uniform", kernel_regularizer=l2(0.1)))
NN.add(BatchNormalization())
NN.add(Dropout(0.5))
NN.add(Conv1D( 256, 3, activation="relu", kernel_initializer="he_uniform", kernel_regularizer=l2(0.1)))
NN.add(BatchNormalization())
NN.add(MaxPooling1D( pool_size=2))
NN.add(Dropout(0.5))
#
#NN.add(Conv1D( 512, 3, activation="relu", kernel_initializer="he_uniform", kernel_regularizer=l2(0.01)))
#NN.add(BatchNormalization())
#NN.add(Dropout(0.5))
#NN.add(Conv1D( 512, 3, activation="relu", kernel_initializer="he_uniform", kernel_regularizer=l2(0.01)))
#NN.add(BatchNormalization())
#NN.add(Dropout(0.5))
#NN.add(Conv1D( 512, 3, activation="relu", kernel_initializer="he_uniform", kernel_regularizer=l2(0.01)))
#NN.add(BatchNormalization())
#NN.add(MaxPooling1D( pool_size=2))
#NN.add(Dropout(0.5))
NN.add(Flatten())

NN.add(Dense(16, activation="relu", kernel_initializer="he_uniform", kernel_regularizer=l2(0.1)))
NN.add(BatchNormalization(axis=1))
NN.add(Dropout(0.5))

NN.add(Dense(16, activation="relu", kernel_initializer="he_uniform", kernel_regularizer=l2(0.1)))
NN.add(BatchNormalization(axis=1))
NN.add(Dropout(0.5))
NN.add(Dense(1, activation="sigmoid", kernel_initializer="glorot_uniform", kernel_regularizer=l2(0.1)))

NN.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

print(NN.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_8 (Conv1D)            (None, 778, 64)           640       
_________________________________________________________________
batch_normalization_10 (Batc (None, 778, 64)           256       
_________________________________________________________________
dropout_10 (Dropout)         (None, 778, 64)           0         
_________________________________________________________________
conv1d_9 (Conv1D)            (None, 776, 64)           12352     
_________________________________________________________________
batch_normalization_11 (Batc (None, 776, 64)           256       
_________________________________________________________________
max_pooling1d_4 (MaxPooling1 (None, 388, 64)           0         
_________________________________________________________________
dropout_11 (Dropout)         (None, 388, 64)           0         
__________

In [25]:
callbacks = [ModelCheckpoint('CNN_model_raw_data_weights.hdf5', monitor='val_acc', save_best_only=True), EarlyStopping(monitor='val_loss', patience=5)]

In [None]:
%%time

NN.fit(X_train, y_train,
       batch_size=64, 
       shuffle=True,
       epochs=30, 
       validation_data=(X_valid, y_valid), 
       callbacks=callbacks,
       verbose=1)

Train on 15463 samples, validate on 3481 samples
Epoch 1/30

In [None]:
scores = NN.evaluate(X_test, y_test)
print("Test accuracy =", scores[1])

In [None]:
files.download('CNN_model_raw_data_weights.hdf5')

In [28]:
# Load best model
NN.load_weights("CNN_model_raw_data_weights.hdf5")

In [29]:
scores = NN.evaluate(X_test, y_test)
print("Test accuracy =", scores[1])

Test accuracy = 0.7127116366568127


In [30]:
scores = NN.evaluate(X_valid, y_valid)
print("Valid accuracy =", scores[1])

Valid accuracy = 0.7411663314625643


0.7275  1w 600s 600c s

0.7222  1w 600s 600c b

0.7248  1w 600s 300c s

0.7152 3w 240s 240c s

0.7011 13w 60s 60c s



In [25]:
from sklearn import metrics

In [None]:
y_predict = NN.predict(X_train)

print("\nTrain set result: ")
print(metrics.classification_report(y_train, y_predict))
print("Confussion matrix: \n", metrics.confusion_matrix(y_train, y_predict))

accuracy = metrics.accuracy_score(y_train, y_predict)
print("\nAccuracy on train set: ", accuracy)

y_predict = NN.predict(X_test)

print("\nTrain set result: ")
print(metrics.classification_report(y_test, y_predict))
print("Confussion matrix: \n", metrics.confusion_matrix(y_test, y_predict))

accuracy = metrics.accuracy_score(y_test, y_predict)
print("\nAccuracy on train set: ", accuracy)

In [None]:
saved_model = NN.to_json()
with open("CNN_model_raw_data.json", "w") as json_file:
    json_file.write(saved_model)
    
files.download('CNN_model_raw_data.json')