In [None]:
from __future__ import print_function

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from os import walk
from os.path import splitext

from sklearn.model_selection import train_test_split
from sklearn import preprocessing

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop, SGD

import custom_loss_functions as cust_loss
from load_data import get_ignore_ids, DataGenerator_raw

## Model

In [None]:
def simple_model(in_dim):
    """Define simple model"""
    # create model
    model = Sequential()
    model.add(Dense(128, activation='relu', input_shape=(in_dim,)))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='linear'))
    # show info
    model.summary()
    # compile model
    model.compile(loss = cust_loss.mse_wrap_angle,
                  optimizer = 'adam',
                  metrics = [cust_loss.mae_wrap_angle, 'mae', 'mse'])
    return model

## Training

In [None]:
# define training parameters
batch_size = 32
num_epochs = 10

# define filename of file containing dataset
filename = '/media/feliximmohr/Storage/master_thesis/generated/database/raw/database_raw.h5'

# list of substrings of parameters of samples to ignore, e.g. 'NFCHOA', 'pos10', 'R006'
ignore_list = [] #['LWFS','R006','M006','M027','R027','M013','R013']

In [None]:
# load data indices
ID_ref = pd.read_hdf(filename, key='ID_reference_table')#.reset_index(drop=True)
ID_ref.set_index('global_id', drop=True, inplace=True)
list_IDs = ID_ref.index.values.astype(np.uint32)
#list_IDs = np.arange(160*360*100*20,dtype='int32')

# load metadata
position_table = pd.read_hdf(filename, key='position_table')
position_table.set_index('pos_id', drop=True, inplace=True)
condition_table = pd.read_hdf(filename, key='condition_table')
condition_table.set_index('cond_id', drop=True, inplace=True)

# only train on samples not to be ignored specified by ignore_list
cond_ign_ids, pos_ign_ids = get_ignore_ids(ignore_list, condition_table)

ign_idx_list = np.array([],dtype=np.uint32)
for pos in pos_ign_ids:
    ign_idx = ID_ref[(ID_ref.pos_id==pos)].index.values.astype(np.uint32)
    ign_idx_list = np.concatenate((ign_idx_list, ign_idx))
for cond in cond_ign_ids:
    ign_idx = ID_ref[(ID_ref.cond_id==cond)].index.values.astype(np.uint32)
    ign_idx_list = np.concatenate((ign_idx_list, ign_idx))
    
ign_idx_list = np.unique(ign_idx_list)
    
list_IDs = np.delete(list_IDs, ign_idx_list)

In [None]:
# train_test_split

# dictionary containing train, validation and test subset IDs
partition = {'train':[], 'validation':[], 'test':[]}

# split data set in training, validation and test data
partition['train'], partition['test'] = train_test_split(list_IDs, shuffle=True, test_size=0.2)
partition['train'], partition['validation'] = train_test_split(partition['train'], shuffle=True, test_size=0.2)

#del ID_reference_table
del list_IDs

In [None]:
# load data
target_data = pd.read_hdf(filename, key='target_data')
feature_data = pd.read_hdf(filename, key='feature_data')
feature_labels = feature_data.columns.tolist()

In [None]:
# normalization
# define scaler
#minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1),copy=False)
#minmax_scaler.fit(feature_data)

#std_scaler = preprocessing.StandardScaler().fit(feature_data)

# perform scaling
#feature_data = minmax_scaler.transform(feature_data)
#minmax_scaler.transform(feature_data)

In [None]:
# model.fit_generator on batches of dataset

# define generators
params = {'dim': feature_data.shape[1],
          'batch_size': batch_size,
          'feature_data': feature_data.values,
          'target_data' : target_data.values,
          'shuffle': True
         }
train_batch_generator = DataGenerator_raw(partition['train'], **params)
valid_batch_generator = DataGenerator_raw(partition['validation'], **params)

model = simple_model(feature_data.shape[1])

history = model.fit_generator(generator = train_batch_generator,
                              #steps_per_epoch = (num_train_samples) // batch_size),
                              epochs = num_epochs,
                              verbose = 1,
                              validation_data = valid_batch_generator,
                              #validation_steps = (num_valid_samples) // batch_size),
                              use_multiprocessing = True,
                              workers = 4)

## Evaluation

In [None]:
# plot train history
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
# evaluate model
test_batch_generator = DataGenerator_raw(partition['test'], **params)
score = model.evaluate_generator(test_batch_generator, verbose=1)
print('Test loss:', score[0])
print('Test mae w wrap:', score[1])
print('Test mae w/o wrap:', score[2])

## Save

In [None]:
# Save history to json file
import json
with open('file.json', 'w') as f:
    json.dump(history.history, f)

In [None]:
from keras.models import load_model
# Save model
model.save('my_model.h5')