In [None]:
from __future__ import print_function

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from os import walk

from sklearn.model_selection import train_test_split
#from sklearn import preprocessing

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop, SGD
from keras.backend.tensorflow_backend import set_session

import tensorflow as tf

import custom_loss_functions as cust_loss
from load_data import get_subset_ids, get_subset_sample_idx, DataGenerator_raw

In [None]:
# configure tf session
config = tf.ConfigProto(device_count={'CPU': 2,
                                      'GPU': 0})
sess = tf.Session(config=config)
set_session(sess)

## Model

In [None]:
def simple_model(in_dim):
    """Define simple model"""
    # create model
    model = Sequential()
    model.add(Dense(128, activation='relu', input_shape=(in_dim,)))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='linear'))
    # show info
    model.summary()
    # compile model
    model.compile(loss = cust_loss.mse_wrap_angle,
                  optimizer = RMSprop(0.001),
                  metrics = [cust_loss.mae_wrap_angle])
    return model

## Training

In [None]:
# define training parameters
batch_size = 1024
num_epochs = 10

# define filename of file containing dataset
filename = '../generated/database_raw_redframes.h5'

# list of substrings of parameters to select, e.g. 'NFCHOA', 'pos10', 'R006'
subset_valid = ['NFCHOA_M027', 'NFCHOA_R027'] #['LWFS','R006','M006','M027','R027','M013','R013']
subset_test = ['NFCHOA_M027', 'NFCHOA_R027']

In [None]:
# load data indices
ID_ref = pd.read_hdf(filename, key='ID_reference_table')#.reset_index(drop=True)
ID_ref.set_index('global_id', drop=True, inplace=True)
list_IDs = ID_ref.index.values.astype(np.uint32)
n_samples = list_IDs.shape[0]
n_frames = 10
#list_IDs = np.arange(160*360*100*20,dtype='int32')

# load metadata
pos_table = pd.read_hdf(filename, key='position_table')
pos_table.set_index('pos_id', drop=True, inplace=True)
cond_table = pd.read_hdf(filename, key='condition_table')
cond_table.set_index('cond_id', drop=True, inplace=True)

# only validate/train/test on samples specified by subset lists
cond_ids_valid, pos_ids_valid, subject_ids_valid = get_subset_ids(subset_valid, cond_table)
cond_ids_test, pos_ids_test, subject_ids_test = get_subset_ids(subset_test, cond_table)
# extract repective samples from ID_ref table
list_IDs_valid = get_subset_sample_idx(ID_ref, cond_ids_valid, pos_ids_valid, subject_ids_valid)
list_IDs_test = get_subset_sample_idx(ID_ref, cond_ids_test, pos_ids_test, subject_ids_test)

idx_list = np.unique(np.concatenate((list_IDs_valid, list_IDs_test)))
list_IDs_train = np.delete(list_IDs, idx_list)

# train_test_split
# dictionary containing train, validation and test subset IDs
partition = {'train':[], 'validation':[], 'test':[]}
# split data set in training, validation and test data
partition['train'] = list_IDs_train
partition['validation'] = list_IDs_valid
partition['test'] = list_IDs_test
#partition['train'], partition['test'] = train_test_split(list_IDs, shuffle=True, test_size=0.2)
#partition['train'], partition['validation'] = train_test_split(list_IDs, shuffle=True, test_size=0.2)
del list_IDs

# load data
target_data = pd.read_hdf(filename, key='target_data')
feature_data = pd.read_hdf(filename, key='feature_data')
feature_labels = feature_data.columns.tolist()

# normalization
# define scaler
#minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1),copy=False)
#minmax_scaler.fit(feature_data)
# perform scaling
#feature_data = minmax_scaler.transform(feature_data)
#minmax_scaler.transform(feature_data)

In [None]:
# model.fit_generator on batches of dataset

# define generators
params = {'dim': feature_data.shape[1],
          'batch_size': batch_size,
          'feature_data': feature_data.values,
          'target_data' : target_data.values,
          'shuffle': True,
          'n_frames': n_frames
         }
train_batch_generator = DataGenerator_raw(partition['train'], **params)
valid_batch_generator = DataGenerator_raw(partition['validation'], **params)

model = simple_model(feature_data.shape[1])

csv_logger = keras.callbacks.CSVLogger('log.csv')

history = model.fit_generator(generator = train_batch_generator,
                              #steps_per_epoch = (num_train_samples) // batch_size),
                              epochs = num_epochs,
                              verbose = 1,
                              validation_data = valid_batch_generator,
                              #validation_steps = (num_valid_samples) // batch_size),
                              #callbacks = [csv_logger],
                              use_multiprocessing = True,
                              workers = 4,
                              max_queue_size = 1000
                              )

In [None]:
model.get_config()

## Evaluation

In [None]:
# plot train history
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss (mae w angle wrapping)')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

plt.plot(history.history['mae_wrap_angle'])
plt.plot(history.history['val_mae_wrap_angle'])
plt.title('Model loss metric mae w angle wrapping')
plt.ylabel('MSE')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
# evaluate model
test_batch_generator = DataGenerator_raw(partition['test'], **params)
score = model.evaluate_generator(test_batch_generator, verbose=1)
print('Test loss:', score[0])
print('Test mae w wrap:', score[1])
#print('Test mae w/o wrap:', score[2])

## Save

In [None]:
# Save history to json file
import json
with open('../../../models_trained/m3_history.json', 'w') as f:
    json.dump(history.history, f)

In [None]:
from keras.models import load_model
# Save model
model.save('../../../models_trained/m3_bs2048_adam_msaw_test-s18-s19-s20.h5')
del model