In [1]:
# Standard python imports
import os
import h5py
import numpy as np
import pandas as pd
from tqdm import tqdm, tnrange, tqdm_notebook

# Keras, tf, and sklearn
import tensorflow as tf
from sklearn.model_selection import train_test_split, GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
# Try seeing if we are using a gpu?
from keras import backend as K
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
K.set_session(sess)
print("Using gpu: ", K.tensorflow_backend._get_available_gpus())

# Make reproducible
SEED = 32
np.random.seed(SEED)
tf.set_random_seed(SEED)

# My module imports
import utils
from make_models import model_2_hidden_layer

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


Using gpu:  ['/job:localhost/replica:0/task:0/device:GPU:0']


# Data Processing

In [2]:
# Maybe get the data .mat file
if not os.path.isfile("data.mat"):
    !wget https://www.dropbox.com/s/b1bnrj2f30xe1ns/xq_data_big.mat?dl=0
    !mv 'xq_data_big.mat?dl=0' data.mat

# Import the data from file
f = h5py.File("data.mat")
data_0, labels_0 = np.array(f['data']), np.array(f['labels'])
f.close()

# Feature subsampling via use of Jensen Shannon Divergence / Mutual Information
colInds = pd.read_csv("assets/divergences.csv")["Gene_index"].values[:1000]
data = utils.preprocess(data_0, colInds)

# Labels are off by 1 cuz matlab
labels = labels_0.squeeze().astype(int) - 1

#utils.visualize_data(data, labels);

In [3]:
# Parameters to use for the rest of this workflow
TEST_PROP = 0.1
NCLASS = 10
NFEATURES = data.shape[1]

# Get test and training
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=TEST_PROP)

# Convert labels to one-hot format
train_labels = (np.arange(NCLASS) == train_labels[:, None]).astype(np.float32)
test_labels = (np.arange(NCLASS) == test_labels[:, None]).astype(np.float32)

# Do Grid Search

In [5]:
# Required args
n_feat = [train_data.shape[1]]
neurons_1 = [2048]
neurons_2 = [512]
# define the grid search parameters
batch_size = [10, 25, 50, 100]
epochs = [3, 6, 10]

# Make grid
param_grid = dict(batch_size=batch_size, epochs=epochs, n_feat=n_feat, neurons_1=neurons_1, neurons_2=neurons_2)

# create model
model = KerasClassifier(build_fn=model_2_hidden_layer, verbose=1)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(train_data, train_labels)



InternalError: CUDA runtime implicit initialization on GPU:0 failed. Status: out of memory

# Results

In [None]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))