In [1]:
"""
Classification of thyroid 
using deep learning (Keras)
"""

import pandas as pd
import numpy as np


def extract_data(path):
    data_frame = pd.read_csv(path, sep="\t")
    return data_frame

def create_features_target(data_frame):
    target = data_frame[["target"]]
    features = data_frame.drop("target", axis=1)
    return features, target

d_frame = extract_data("data/allbp.tsv")
print(d_frame.shape)
data_features, data_target = create_features_target(d_frame)
data_features = data_features
print(data_features.shape)
print(data_target.shape)

(3772, 30)
(3772, 29)
(3772, 1)


In [2]:
from sklearn.model_selection import train_test_split

# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html#sklearn.model_selection.train_test_split
data_features_train, data_features_test, data_target_train, data_target_test = train_test_split(data_features, 
                                                                                                data_target, test_size=0.33, random_state=42)

print(data_features_train.shape)
print(data_target_train.shape)
print(data_features_test.shape)
print(data_target_test.shape)

(2527, 29)
(2527, 1)
(1245, 29)
(1245, 1)


In [None]:
import sys
import numpy as np
import time
import warnings
import csv
import pandas as pd
import json
import h5py

# machine learning library
from keras.callbacks import Callback
from keras.models import Sequential
from keras.layers import Flatten, Input
from keras.layers.core import Dropout, Activation, Dense
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

from hyperopt import fmin, tpe, hp, STATUS_OK, Trials


def create_model():
    output_units = 3
    loss_type = 'categorical_crossentropy'
    train_X, train_y, test_X, test_y = self.load_data()
    inputs = Input(shape=(dimensions,))
    x = Dense(int(params["hidden_units"]), activation=params["hidden_activation"])(inputs)
    x = Dropout(params["dropout"])(x)
    x = Dense(int(params["hidden_units"]), activation=params["hidden_activation"])(x)
    x = Dropout(params["dropout"])(x)
    predictions = Dense(output_units, activation=params["output_activation"])(x)
    # assign the inputs and output to the model
    model = Model(inputs=inputs, outputs=predictions)
    # create an optimizer object
    optimizer = Adam(lr=params["learning_rate"])
    # add optimizer and loss type to the model
    model.compile(optimizer=optimizer, loss=loss_type)
    model.summary()
    early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2)
    # train the model using the training data and compute error on validation data
    model_fit = model.fit(train_X, train_y, validation_data=(test_X, test_y), epochs=self.n_epochs, batch_size=int(params["batch_size"]), callbacks=[early_stopping])
    # predict on test data using trained model
    prediction = model.predict(test_X)
    return {'loss': model_fit.history["val_loss"][-1], 'status': STATUS_OK, 'model': model}

# set initial ranges for parameters
max_evals = 30
batch_size = [30, 250]
hidden_activation = ['tanh', 'relu', 'elu', 'sigmoid']
hidden_units = [30, 250]

hyper_params = {
    "batch_size": hp.quniform("batch_size", batch_size[0], batch_size[1], 1),
    "hidden_units": hp.quniform("hidden_units", hidden_units[0], hidden_units[1], 1),
    "learning_rate": hp.loguniform("learning_rate", np.log(1e-4), np.log(1e-2)),
    "hidden_activation": hp.choice("hidden_activation", hidden_activation),
    "output_activation": hp.choice("output_activation", hidden_activation),
    "dropout": hp.uniform("dropout", 0.0, 0.5)
}

trials = Trials()
best_parameters = fmin(create_model, hyper_params, trials=trials, algo=tpe.suggest, max_evals=max_evals)
sorted_results = sorted(trials.results, key=lambda i: i['loss'])
apply_nn.save_model(sorted_results[0])

best_model_params = dict()
print(best_parameters)
for item in best_parameters:
    item_val = best_parameters[item]
    if item == 'hidden_activation':
        best_model_params[item] = hidden_activation[item_val]
    elif item == 'output_activation':
        best_model_params[item] = hidden_activation[item_val]
    else:
        best_model_params[item] = item_val
print(best_model_params)