## Load dependencies

In [1]:
from keras.applications.vgg16 import VGG16
from keras.models import Model
from keras.layers import Flatten, Dense, Dropout, Activation, Input
from keras.optimizers import Adam
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, CSVLogger, EarlyStopping
import pickle
from ourUtils import *
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from datetime import datetime

import tensorflow as tf
from keras.backend.tensorflow_backend import set_session

Using TensorFlow backend.
The minimum supported version is 2.4.6



### Initialize parameters

In [2]:
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.9
set_session(tf.Session(config=config))

### Define network

In [3]:
# Load the convolutional part of the VGG16 network 
vggConv = VGG16(weights='imagenet', include_top=False)
vgg_train = False

# Input to network
vggInput = Input(shape=(224, 224, 3), name='image_input')
# Output of convolutional part
output_vggConv = vggConv(vggInput)
# Label predictive layers. Initialized using glorot (Xavier's), L2 regularization and dropout
lpmF = Flatten()(output_vggConv)
lpm1 = Dense(4096, activation='relu', kernel_initializer='glorot_normal',
            kernel_regularizer=regularizers.l2(0.01))(lpmF)
lpm1Dr = Dropout(0.5)(lpm1)
lpm2 = Dense(4096, activation='relu', kernel_initializer='glorot_normal',
            kernel_regularizer=regularizers.l2(0.01))(lpm1Dr)
#lpm2Dr = Dropout(0.5)(lpm2)
lpm3 = Dense(5, activation=None, kernel_initializer='glorot_normal')(lpm2)
lpmS = Activation('softmax')(lpm3)
# Make into single network
vggConvSleep = Model(inputs=vggInput, outputs=lpmS)
# If conv layers should not be trained: 
# Maybe this should be excluded


#for layer in vggConvSleep.layers[:2]:
#    layer.trainable = False
#else:
#    for layer in vggConvSleep.layers[1].layers[:-2]:
#        layer.trainable = False

# Optimizer
optimize = Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

# Compile the model
vggConvSleep.compile(loss='categorical_crossentropy', optimizer=optimize, metrics=['categorical_accuracy'])

# Get model summary
vggConvSleep.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
image_input (InputLayer)     (None, 224, 224, 3)       0         
_________________________________________________________________
vgg16 (Model)                multiple                  14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 4096)              102764544 
_________________________________________________________________
dropout_1 (Dropout)          (None, 4096)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 4096)              16781312  
_________________________________________________________________
dense_3 (Dense)              (None, 5)                 20485     
__________

## Define training mode

In [4]:
training_mode = 'source'
# training_mode = 'target'
# training_mode = 'dann'

## Create data generators

In [5]:
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)


### Fit data generator

In [6]:
path = '/home/jaskmo/Documents/programering/02456DomainAdaptation/'
if training_mode == 'source':
    data_path = path + 'taperImages/pysNetData'
    OC_path  = path + 'taperImages/hData'
else:
    data_path = path + 'taperImages/hData'
    OC_path = path + 'taperImages/pysNetData'

# batch nr per epoch
batchSize = 32

train_generator = train_datagen.flow_from_directory(
        data_path + '/train',
        target_size=(224, 224),
        batch_size=batchSize,
        class_mode='categorical',
        shuffle=True)
#       color_mode='grayscale',
#       save_to_dir=dataPath + 'tmpImg')

validation_generator = test_datagen.flow_from_directory(
        data_path + '/validation',
        target_size=(224, 224),
        batch_size=batchSize,
        class_mode='categorical',
#       color_mode='grayscale'
        shuffle=True)

test_generator = test_datagen.flow_from_directory(
        data_path + '/test',
        target_size=(224, 224),
        batch_size=batchSize,
        class_mode='categorical',
#       color_mode='grayscale')
        )

# other dataset test generator
OC_test_generator = test_datagen.flow_from_directory(
        OC_path + '/test',
        target_size=(224, 224),
        batch_size=batchSize,
        class_mode='categorical',
#       color_mode='grayscale')
        )

Found 29772 images belonging to 5 classes.
Found 4807 images belonging to 5 classes.
Found 3862 images belonging to 5 classes.
Found 2722 images belonging to 5 classes.


In [7]:
test_generator.class_indices

{'N1': 0, 'N2': 1, 'N3': 2, 'REM': 3, 'wake': 4}

In [8]:
trainStepEpoch = np.floor_divide(train_generator.n, batchSize) #np.floor_divide(len(os.listdir(dataPath + 'train/left/'))*3, batchSize)
valStepEpoch = np.floor_divide(validation_generator.n, batchSize) #np.floor_divide(len(os.listdir(dataPath + 'validate/left/'))*3, batchSize)
testStepEpoch = np.floor_divide(test_generator.n, batchSize) #np.floor_divide(len(os.listdir(dataPath + 'test/left/'))*3, batchSize)
OC_testStepEpoch = np.floor_divide(OC_test_generator.n, batchSize)

In [9]:
# # Number of subjects for each group
# num_subjects_physionet = 20
# num_subjects_hospital = 17

# # Load all data into memory
# # Change data path before running !!
# data_physionet = pickle.load(open('/home/jaskmo/Documents/DataCollection/sleep-edfx/PickleJar/dataOut.pkl','rb'))
# data_hospital = pickle.load(open('/home/jaskmo/Documents/DataCollection/sleep-edfx/PickleJar/hDataOut.pkl', 'rb'))
# random_perm_physionet = np.random.permutation(num_subjects_physionet)
# random_perm_hospital = np.random.permutation(num_subjects_hospital)
# idx_tmp_physionet = random_perm_physionet[range(num_subjects_physionet - 3)]
# idx_test_physionet = random_perm_physionet[(num_subjects_physionet - 3):num_subjects_physionet]
# idx_tmp_hospital = random_perm_hospital[range(num_subjects_hospital - 3)]
# idx_test_hospital = random_perm_hospital[(num_subjects_hospital- 3) : num_subjects_hospital]
# inputs_train_phys, targets_train_phys, inputs_val_phys, targets_val_phys, inputs_test_phys, targets_test_phys = get_data_complete(
#     idx_tmp_physionet, idx_test_physionet, data_physionet, 'physionet')
# inputs_train_hosp, targets_train_hosp, inputs_val_hosp, targets_val_hosp, inputs_test_hosp, targets_test_hosp = get_data_complete(
#     idx_tmp_hospital, idx_test_hospital, data_hospital, 'hospital')

## Learning rate adaptation

In [10]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss',epsilon=0.1, factor=0.3, patience=4, min_lr=0.000001, verbose=1)
erl_stop = EarlyStopping(monitor = 'val_loss', min_delta=0.05, patience=7, verbose=1, mode='min')

In [11]:
class_fraq = []
class_weights = []
#inv_map = {v: k for k, v in train_generator.class_indices.items()}
for i in range(train_generator.num_class):
    class_fraq.append(len(train_generator.classes[train_generator.classes == i])/train_generator.n)
big_class = max(class_fraq)
for i in range(train_generator.num_class):
    class_weights.append(np.floor_divide(big_class,class_fraq[i]))

weights_dic = dict(zip(train_generator.class_indices.values(),class_weights))

## Fit model and save

In [12]:
# Fit model
# Change Save paths before running !!
now = datetime.now()
if training_mode == 'source': # Training on source data from physionet
    csv_logger = CSVLogger('/media/jaskmo/ELEK/bme/Project02456/trainingLog/sourceModel' + 
                           str(now.day) + '-' + str(now.month) + '-' + str(now.year) + '_' + 
                           str(now.hour) + str(now.minute) + '.log')
    # Train the model
    # fit_generator(train_generator, steps_per_epoch=trainStepEpoch, validation_data=validation_generator,
    #                  validation_steps=valStepEpoch, epochs=50, verbose=1, callbacks=[csv_logger, reduce_lr])

    vggConvSleep.fit_generator(train_generator, steps_per_epoch=trainStepEpoch, validation_data=validation_generator, 
                               validation_steps=valStepEpoch, epochs=80, verbose=1, callbacks=[reduce_lr, csv_logger], 
                              )#class_weight=weights_dic)
    
    # save model
    vggConvSleep.save(filepath='/home/jaskmo/Documents/programering/02456DomainAdaptation/models/kerasSource' + 
                  str(now.day) + '-' + str(now.month) + '-' + str(now.year) + '_' + 
                  str(now.hour) + str(now.minute) + '.h5')

elif training_mode == 'target': # Training on target data from hospital
    csv_logger = CSVLogger('/media/jaskmo/ELEK/bme/Project02456/trainingLog/targetModel' + 
                           str(now.day) + '-' + str(now.month) + '-' + str(now.year) + '_' + 
                           str(now.hour) + str(now.minute) + '.log')
    # Train the model
    vggConvSleep.fit_generator(train_generator, steps_per_epoch=trainStepEpoch, validation_data=validation_generator,
                               validation_steps=valStepEpoch, epochs=80, verbose=1, callbacks=[reduce_lr, csv_logger],
                              )#class_weight=weights_dic)
    
    # save model
    vggConvSleep.save(filepath='/home/jaskmo/Documents/programering/02456DomainAdaptation/models/kerasTarget' + 
                  str(now.day) + '-' + str(now.month) + '-' + str(now.year) + '_' + 
                  str(now.hour) + str(now.minute) + '.h5')

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 00017: reducing learning rate to 2.9999999242136253e-06.
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 00021: reducing learning rate to 1e-06.
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80


## Evaluate model on both source and target

In [13]:
test_img, test_lable = test_generator.next()
for count in range(int(np.floor(testStepEpoch))):
    tmp_img, tmp_lable = test_generator.next()
    test_img = np.concatenate((test_img, tmp_img), axis=0)
    test_lable = np.concatenate((test_lable, tmp_lable),axis=0)

In [14]:
#loss, metric = vggConvSleep.evaluate(x=inputs_test_phys, y=targets_test_phys, batch_size=50)
inv_map = {v: k for k, v in test_generator.class_indices.items()}
target_names = list(inv_map.values())

targets_test_int = [np.where(r == 1)[0][0] for r in test_lable]
y_pred = vggConvSleep.predict(test_img)
y_pred2 = np.argmax(y_pred, axis = 1)
# Test accuracy:
acc = accuracy_score(targets_test_int, y_pred2)
print('Accuracy in this domain = ', acc)

conf_mat = confusion_matrix(targets_test_int, y_pred2)
print(conf_mat)
# Per class metrics
class_report = classification_report(targets_test_int, y_pred2, target_names=target_names)
print(class_report)

# Evaluate error on target data
_, metric = vggConvSleep.evaluate_generator(OC_test_generator, steps=OC_testStepEpoch)
print('Accuracy on other domain = ', metric)

    
# elif training_mode == 'target': # Training on target data from hospital
#     # Convert from onehot
#     targets_test_int = [np.where(r == 1)[0][0] for r in targets_test_hosp]
#     y_pred = vggConvSleep.predict(inputs_test_hosp)
#     y_pred2 = np.argmax(y_pred, axis = 1)
#     # Test accuracy:
#     acc = accuracy_score(targets_test_int, y_pred2)
#     print('Accuracy in this domain = ', acc)
#     # Confusion matrix for target
#     conf_mat = confusion_matrix(targets_test_int, y_pred2)
#     print(conf_mat)
#     # Per class metrics
#     class_report = classification_report(targets_test_int, y_pred2, target_names=target_names)
#     print(class_report)
    
#     # Evaluate error on source data
#     _, metric = vggConvSleep.evaluate(x=inputs_test_phys, y=targets_test_phys, batch_size=50)
#     print('Accuracy on other domain = ', metric)

Accuracy in this domain =  0.803469704816
[[ 118   53    0   20   20]
 [ 155 1484  111   98    9]
 [   8   52  441    7    1]
 [  42  118    0  688    3]
 [  36   14    0   12  372]]
             precision    recall  f1-score   support

         N1       0.33      0.56      0.41       211
         N2       0.86      0.80      0.83      1857
         N3       0.80      0.87      0.83       509
        REM       0.83      0.81      0.82       851
       wake       0.92      0.86      0.89       434

avg / total       0.82      0.80      0.81      3862

Accuracy on other domain =  0.829411764706


In [15]:
#reverse_data_split(path + 'taperImages/pysNetData/')

In [16]:
#create_data_split(path + 'taperImages/pysNetData/','pys', 2, 2)