In [None]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

from aircraft_dataset import aircraft_dataset_split
from aircraft_dataset import AircraftDatasetFoldIterator

from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras import regularizers
from keras.wrappers.scikit_learn import KerasClassifier

import sys
sys.path.append('../')
import util.reports as rp

In [None]:
# Loading dataset
dataset = pd.read_csv('exports/2019-07-28 13-06-28/segment_1.csv')
predictors = dataset.iloc[:,:680].values
responses = dataset.iloc[:,680].values
measurements = dataset.iloc[:,681].values
np.unique(responses)

In [None]:
# Splits into training and holdout set
X,y,X_holdout,y_holdout,measurements,measurement_holdout = aircraft_dataset_split(predictors,responses,measurements,
                                                                                  return_measurements=True)
lc = LabelEncoder()
lc.fit(y)
y = lc.transform(y)
y_holdout = lc.transform(y_holdout)
iterator = AircraftDatasetFoldIterator(X,y,measurements,folds=3)
iterator.build()

In [None]:
# Utility function to build the Keras model (params of the function are used during grid search)
def build_model(num_features, num_outputs, dense_architecture, regularization = 0,
                dense_activation='sigmoid', dropout_rate = 0):
    mdl = Sequential()
    mdl.add(Conv2D(32, kernel_size=2, activation='relu', input_shape=(20,34,1),
                   kernel_regularizer=regularizers.l2(regularization)))
    mdl.add(MaxPooling2D(pool_size=2))
    mdl.add(Conv2D(64, kernel_size=2, activation='relu',
                   kernel_regularizer=regularizers.l2(regularization)))
    mdl.add(MaxPooling2D(pool_size=2))
    mdl.add(Flatten())
    for i, value in enumerate(dense_architecture):
        if i == 0:
            mdl.add(Dense(value, activation=dense_activation,
                          kernel_regularizer=regularizers.l2(regularization)))
        else:
            mdl.add(Dense(value, activation=dense_activation,
                          kernel_regularizer=regularizers.l2(regularization)))
        mdl.add(Dropout(rate=dropout_rate))
        
    if num_outputs > 1:
        mdl.add(Dense(num_outputs, activation='softmax'))
        mdl.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
    else:
        mdl.add(Dense(num_outputs, activation='sigmoid'))
        mdl.compile(optimizer='adam', loss='binary_crossentropy')
        
    return mdl

In [None]:
# Performs grid search
clf = KerasClassifier(build_model, num_features = X.shape[1], num_outputs = len(np.unique(y)),
                      batch_size = None, epochs = 100, verbose = 1)

architecture_choices = [(100,50), (100,)]
activation_choices = ['sigmoid', 'relu']
regularization_choices = [0,0.1]
dropout_choices = [0,0.1,0.2]

hyperparams = [{
    'dense_architecture': architecture_choices,
    'dense_activation': activation_choices,
    'regularization': regularization_choices,
    'dropout_rate': dropout_choices
}]

validator = GridSearchCV(clf, cv=iterator, param_grid=hyperparams, scoring='accuracy', n_jobs=-1, iid = False, verbose = 1)
validator.fit(X.reshape((X.shape[0],20,34,1)),y)
rp.report_grid_search(validator.cv_results_)

In [None]:
# Training set performance
y_pred = validator.predict(X.reshape((X.shape[0],20,34,1)))
rp.report_classification(y,y_pred,avg='macro')

In [None]:
# Holdout set performance
y_pred = validator.predict(X_holdout.reshape((X_holdout.shape[0],20,34,1)))
rp.report_classification(y_holdout,y_pred,avg='macro')