#### pre-train UCI-HAR Dataset (or other individual data source)

In [None]:
import os
import pickle
from sklearn.preprocessing import LabelEncoder
import sktime
from sktime.datasets import load_from_tsfile
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import keras
from sklearn.metrics import accuracy_score, precision_score, recall_score
import matplotlib
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import time

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"]="1"
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
        print(e)

In [None]:
data_name = 'UCI_HAR'  ##'UCI_HAR', 'UAH_Driveset', 'PVS_Dataset'
model_type = 'fcn'  ## 'fcn', 'lstm'

In [None]:
## UCI_HAR_dataset

if data_name == 'UCI_HAR':
     with open('Extra_Dataset/UCI_HAR_dataset.pkl', 'rb') as f:
          [train_X, test_X, train_y, test_y] = pickle.load(f)

## UAH-Driveset

elif data_name == 'UAH_Driveset':
     file_path = '../../UAH-driveset/'
     train_file = 'all_feat_featset.p'
     test_file = 'all_label_featset.p'

     with open(file_path+train_file, 'rb') as f:
          X = pickle.load(f)

     with open(file_path+test_file, 'rb') as f:
          y = pickle.load(f)
          
     X = np.array(X)
     X = X[:,:,1:]
     y = np.array(y)

     le = LabelEncoder()
     y = le.fit_transform(y)

     print(X.shape)
     print(y.shape)

     train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=0)


## PVS_Dataset

elif data_name == 'PVS_Dataset':
     file_path = 'Extra_Dataset/'
     file_name = 'PVS_Dataset.p'

     with open(file_path+file_name, 'rb') as f:
          data = pickle.load(f)

     X = data[0]
     y = data[1]

     le = LabelEncoder()
     y = le.fit_transform(y)

     print(X.shape)
     print(y.shape)

     train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
def build_model(input_shape, nb_classes, pre_model=None):
	input_layer = keras.layers.Input(input_shape)

	conv1 = keras.layers.Conv1D(filters=128, kernel_size=8, padding='same')(input_layer)
	conv1 = keras.layers.normalization.batch_normalization.BatchNormalization()(conv1)
	conv1 = keras.layers.Activation(activation='relu')(conv1)

	conv2 = keras.layers.Conv1D(filters=256, kernel_size=5, padding='same')(conv1)
	conv2 = keras.layers.normalization.batch_normalization.BatchNormalization()(conv2)
	conv2 = keras.layers.Activation('relu')(conv2)

	conv3 = keras.layers.Conv1D(128, kernel_size=3,padding='same')(conv2)
	conv3 = keras.layers.normalization.batch_normalization.BatchNormalization()(conv3)
	conv3 = keras.layers.Activation('relu')(conv3)

	gap_layer = keras.layers.pooling.GlobalAveragePooling1D()(conv3)

	output_layer = keras.layers.Dense(nb_classes, activation='softmax')(gap_layer)

	model = keras.models.Model(inputs=input_layer, outputs=output_layer)

	if pre_model is not None:

		for i in range(len(model.layers)-1):
			model.layers[i].set_weights(pre_model.layers[i].get_weights())

	model.compile(loss='categorical_crossentropy', optimizer = keras.optimizers.Adam(),
		metrics=['accuracy'])

	return model

In [None]:
def save_logs(output_directory, hist, y_pred, y_true,duration,lr=True,y_true_val=None,y_pred_val=None):
    hist_df = pd.DataFrame(hist.history)
    hist_df.to_csv(output_directory+'history.csv', index=False)

    df_metrics = calculate_metrics(y_true,y_pred, duration,y_true_val,y_pred_val)
    df_metrics.to_csv(output_directory+'df_metrics.csv', index=False)

    index_best_model = hist_df['loss'].idxmin() 
    row_best_model = hist_df.loc[index_best_model]

    df_best_model = pd.DataFrame(data = np.zeros((1,6),dtype=np.float) , index = [0], 
        columns=['best_model_train_loss', 'best_model_val_loss', 'best_model_train_acc', 
        'best_model_val_acc', 'best_model_learning_rate','best_model_nb_epoch'])
    
    print(row_best_model)
    
    df_best_model['best_model_train_loss'] = row_best_model['loss']
    df_best_model['best_model_val_loss'] = row_best_model['val_loss']
    df_best_model['best_model_train_acc'] = row_best_model['accuracy']
    df_best_model['best_model_val_acc'] = row_best_model['val_accuracy']
    if lr == True:
        # print('row_best_model')
        # print(row_best_model)
        df_best_model['best_model_learning_rate'] = row_best_model['lr']
    df_best_model['best_model_nb_epoch'] = index_best_model

    df_best_model.to_csv(output_directory+'df_best_model.csv', index=False)
    # print('df_best_model')
    # print(df_best_model)

    # for FCN there is no hyperparameters fine tuning - everything is static in code 

    # plot losses 
    plot_epochs_metric(hist, output_directory+'epochs_loss.png')

    return df_metrics

In [None]:
def calculate_metrics(y_true, y_pred,duration,y_true_val=None,y_pred_val=None): 
    res = pd.DataFrame(data = np.zeros((1,4),dtype=np.float), index=[0], 
        columns=['precision','accuracy','recall','duration'])
    res['precision'] = precision_score(y_true,y_pred,average='macro')
    res['accuracy'] = accuracy_score(y_true,y_pred)
    
    if not y_true_val is None:
        # this is useful when transfer learning is used with cross validation
        res['accuracy_val'] = accuracy_score(y_true_val,y_pred_val)

    res['recall'] = recall_score(y_true,y_pred,average='macro')
    res['duration'] = duration
    return res

In [None]:
def plot_epochs_metric(hist, file_name, metric='loss'):
    plt.figure()
    plt.plot(hist.history[metric])
    plt.plot(hist.history['val_'+metric])
    plt.title('model '+metric)
    plt.ylabel(metric,fontsize='large')
    plt.xlabel('epoch',fontsize='large')
    plt.legend(['train', 'val'], loc='upper left')
    plt.savefig(file_name,bbox_inches='tight')
    plt.close()

In [None]:
def transform_labels(y_train,y_test):
    """
    Transform label to min equal zero and continuous 
    For example if we have [1,3,4] --->  [0,1,2]
    """
    # init the encoder
    encoder = LabelEncoder()
    # concat train and test to fit
    y_train_test = np.concatenate((y_train,y_test),axis =0)
    # fit the encoder
    encoder.fit(y_train_test)
    # transform to min zero and continuous labels
    new_y_train_test = encoder.transform(y_train_test)
    # resplit the train and test
    new_y_train = new_y_train_test[0:len(y_train)]
    new_y_test = new_y_train_test[len(y_train):]
    return new_y_train, new_y_test

In [None]:
def train(x_train,y_train,x_test,y_test,callbacks,pre_model=None):	

    y_true_val = None
    y_pred_val = None
    
    mini_batch_size = int(min(x_train.shape[0]/10, batch_size))
    nb_classes = len(np.unique(np.concatenate((y_train,y_test),axis =0)))

	# make the min to zero of labels
    y_train,y_test = transform_labels(y_train,y_test)

    # save orignal y because later we will use binary
    y_true = y_test.astype(np.int64)

    # transform the labels from integers to one hot vectors
    y_train = keras.utils.to_categorical(y_train, nb_classes)
    y_test = keras.utils.to_categorical(y_test, nb_classes)

    if len(x_train.shape) == 2: # if univariate 
        # add a dimension to make it multivariate with one dimension 
        x_train = x_train.reshape((x_train.shape[0],x_train.shape[1],1))
        x_test = x_test.reshape((x_test.shape[0],x_test.shape[1],1))

    start_time = time.time()
    # remove last layer to replace with a new one 
    input_shape = (None,x_train.shape[2])
    model = build_model(input_shape, nb_classes,pre_model)

    if verbose == True: 
        model.summary()

    # b = model.layers[1].get_weights()

    hist = model.fit(x_train, y_train, batch_size=mini_batch_size, epochs=nb_epochs,
        verbose=verbose, validation_data=(x_test,y_test), callbacks=callbacks)

    # a = model.layers[1].get_weights()

    # compare_weights(a,b)

    model = keras.models.load_model(file_path)

    y_pred = model.predict(x_test)
    # convert the predicted from binary to integer 
    y_pred = np.argmax(y_pred , axis=1)

    duration = time.time()-start_time

    df_metrics = save_logs(write_output_dir, hist, y_pred, y_true,
                           duration,lr=True, y_true_val=y_true_val,
                           y_pred_val=y_pred_val)

    print('df_metrics')
    print(df_metrics)

    keras.backend.clear_session()

In [None]:
def create_directory(directory_path): 
    if os.path.exists(directory_path): 
        return None
    else: 
        try: 
            os.makedirs(directory_path)
        except: 
            # in case another machine created the path meanwhile !:(
            return None 
        return directory_path

### Pretrain Multivariate Timeseries

In [None]:
batch_size = 16
nb_epochs = 2000
verbose = 1

if model_type = 'fcn':
    results_dir = 'results/fcn/'
elif model_type = 'lstm':
    results_dir = 'results_LSTM/fcn/'

write_output_dir = results_dir + data_name + '/'
# set model output path
file_path = write_output_dir + 'best_model.hdf5'
# create directory
create_directory(write_output_dir)
# reduce learning rate
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5,
                                                patience=50,min_lr=0.0001)
# model checkpoint
model_checkpoint = keras.callbacks.ModelCheckpoint(filepath=file_path, monitor='loss',
                                                    save_best_only=True)
callbacks=[reduce_lr,model_checkpoint]

train(train_X, train_y, test_X, test_y, callbacks,pre_model=None)

### Transfer Learning using pretrained model

In [None]:
with open('../Data/data_no_std.pkl', 'rb') as f:
    dataset = pickle.load(f)

train_X = dataset[0]
test_X = dataset[1]
train_y = dataset[2]
test_y = dataset[3]


if model_type = 'fcn':
    results_dir = 'results/transfer/'
elif model_type = 'lstm':
    results_dir = 'results_LSTM/transfer/'
    
write_output_dir = results_dir + data_name + '/'
# set model output path
file_path = write_output_dir + 'best_model.hdf5'
# create directory
create_directory(write_output_dir)
pre_model = keras.models.load_model('results/fcn/'+data_name+'/best_model.hdf5')

# reduce learning rate
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5,
                                                patience=50,min_lr=0.0001)
# model checkpoint
model_checkpoint = keras.callbacks.ModelCheckpoint(filepath=file_path, monitor='loss',
                                                    save_best_only=True)
callbacks=[reduce_lr,model_checkpoint]

train(train_X, train_y, test_X, test_y, callbacks,pre_model=None)