In [None]:
from __future__ import print_function

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from os import walk

from sklearn.model_selection import train_test_split
from sklearn import preprocessing

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop, SGD
from keras.utils import Sequence

import custom_loss_functions as c_loss

In [None]:
class data_generator(Sequence):
    """
    Generator class to process large datasets.
    """

    def __init__(self, filenames, feat_col_labels, target_col_labels, batch_size):
        self.filenames = filenames
        self.feat_col_labels = feat_col_labels
        self.target_col_labels = target_col_labels
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.filenames) / float(self.batch_size)))

    def __getitem__(self, idx):
        data = pd.read_csv(self.filenames[idx])# * self.batch_size:(idx + 1) * self.batch_size])
        batch_x = data[self.feat_col_labels].values
        batch_y = data[self.target_col_labels].values

        return np.array(batch_x), np.array(batch_y)

In [None]:
def simple_model():
    """Define simple model"""
    # create model
    model = Sequential()
    model.add(Dense(128, activation='relu', input_shape=(96,)))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='linear'))
    # show info
    model.summary()
    # compile model
    model.compile(loss = c_loss.mse_wrap_angle2,
                  optimizer = RMSprop(),
                  metrics = [c_loss.mae_wrap_angle2])
    return model

## Training

In [None]:
# define training parameters
batch_size = 1
num_epochs = 100

In [None]:
# define dataset directory
data_dir = '/media/feliximmohr/Storage/master_thesis/generated/database/' 
# load filelist
filelist = []
for (dirpath, dirnames, filenames) in walk(data_dir):
    filelist.extend(filenames)
    
# load column labels only
column_label = pd.read_csv(data_dir+filelist[1], nrows=1).columns.tolist()
feature_label = [s for s in column_label if ("ILD"  in s or "ITD" in s or "IC" in s)]
target_label = 'Localization_Azimuth'

In [None]:
num_train_samples = 20*720000
num_valid_samples = 10*720000

In [None]:
# model.fit_generator on batches of dataset

model = simple_model()

train_filenames = [data_dir + s for s in filelist[0:20]]
valid_filenames = [data_dir + s for s in filelist[20:30]]

# define generators
train_batch_generator = data_generator(train_filenames, feature_label, target_label, 1)
valid_batch_generator = data_generator(valid_filenames, feature_label, target_label, 1)

history = model.fit_generator(generator = train_batch_generator,
                              steps_per_epoch = (num_train_samples),# // batch_size),
                              epochs = num_epochs,
                              verbose = 1,
                              validation_data = valid_batch_generator,
                              validation_steps = (num_valid_samples),# // batch_size),
                              use_multiprocessing = True,
                              workers = 2,
                              max_queue_size = 32)

In [None]:
# normalization + train_test_split

# load data
data = pd.read_csv(data_dir+filelist[60])
#test_data = pd.read_csv(data_dir+filelist[61])

# define scaler
minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
minmax_scaler.partial_fit(data[feature_label])
#minmax_scaler.partial_fit(test_data[feature_label])

std_scale = preprocessing.StandardScaler().fit(data[feature_label])

# perform train_test_split
train, test = train_test_split(data, test_size=0.2)

# perform scaling
train_minmax = minmax_scaler.transform(train[feature_label])
test_minmax = minmax_scaler.transform(test[feature_label])
train_std = std_scale.transform(train[feature_label])
test_std = std_scale.transform(test[feature_label])

x_train = train_minmax
y_train = train[target_label]
x_test = test_minmax
y_test = test[target_label]

## Evaluation

In [None]:
# evaluate model
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test mae w/o wrap:', score[1])
print('Test mae w wrap:', score[2])

# plot train history
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

## Save

In [None]:
# Save history to json file
import json
with open('file.json', 'w') as f:
    json.dump(history.history, f)

In [None]:
from keras.models import load_model

model.save('my_model.h5')  # creates a HDF5 file 'my_model.h5'
del model  # deletes the existing model