In [None]:
import pandas as pd

# Prep some data

In [36]:
clin = pd.read_csv('../data/data/TCGA_Metabric_clinical.csv', index_col=0)
for target in list(clin):
    kept = clin[~clin[target].isna()][[target]]
    kept.to_csv('../data/TCGA_Metabric_'+target.replace(' ', '_')+'.csv')

# Define custom models

In [12]:
class MyMLPClassifier(MLPClassifier):
    def __init__(self, hidden_layer_width=2, hidden_layer_depth=2, **kwargs):
        hidden_layer_sizes = tuple([ int(hidden_layer_width)] * int(hidden_layer_depth))
        super().__init__(hidden_layer_sizes=hidden_layer_sizes, **kwargs)


# Import MongoDB credentials

In [3]:
import csv
with open('credentials.csv') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        mongo_user = row['username']
        mongo_pwd = row['pwd']
        mongo_host = row['host']        

# Usage

In [None]:
import prior_pipeline as pp

from hyperopt import hp
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.gaussian_process.kernels import RBF

from prior_pipeline.custom_models.my_mlp import MyMLPClassifier

# mongo = {
#             'url': 'mongo://{}:{}@{}:27017/hyperopt/jobs'.format(mongo_user, mongo_pwd, mongo_host),
#             'exp': 'test_run'
#         }

mongo = {
            'url': 'mongo://{}:27017/hyperopt/jobs'.format(mongo_host),
            'exp': 'test_run_1'
        }

dims = [3, 5, 10, 15, 20, 40, 70]
data_paths = ['../data_s3/TCGA_Metabric_RNA_TSNE_{}.csv'.format(i) for i in dims]
targets_path =  '../data_s3/TCGA_Metabric_2_years.csv'

space = (hp.choice('model',
        [
            {
                'name': SVC,
                'params':{
                    'probability':True
                }
            },
            {
                'name': GaussianProcessClassifier,
                'params': {
                    'kernel':  RBF(1),
                }
            },
            {
                'name': GaussianNB
            },
            {
                'name': MyMLPClassifier,
#                 'params': {
#                     'activation': hp.choice('activation', ['tanh', 'relu']),
#                     'alpha': hp.choice('alpha', [5e-2]),
#                     'solver' : hp.choice('solver', ['adam', 'lbfgs', 'sgd']),
#                     'learning_rate': hp.choice('learning_rate', ['constant', 'adaptive']),
#                     'hidden_layer_width': hp.quniform('hidden_layer_width', 2, 300, 1),
#                     'hidden_layer_depth': hp.quniform('hidden_layer_depth', 2, 4, 1)
#                 }
            }
        ]),
        hp.choice('data_path', data_paths),
        targets_path)


best = pp.classifier.fit(space=space, max_evals=100, mongo=mongo)
# best = pp.classifier.fit(space=space, max_evals=10)

over-writing old domain trials attachment


In [5]:
import hyperopt
trials = hyperopt.mongoexp.MongoTrials(mongo['url'], exp_key=mongo['exp'])

In [19]:
trials.losses()

[None,
 0.3313511066115411,
 0.33817451755797534,
 0.2998145572388784,
 0.31272508522529296,
 0.3362504882309154,
 0.2998145572388784,
 0.3313511066115411,
 0.29828665922959297,
 0.31272508522529296]

In [20]:
trials.trials

[SON([('_id', ObjectId('5ba225f397b7ea5f194a58d4')), ('state', 1), ('tid', 9), ('spec', None), ('result', SON([('status', 'new')])), ('misc', SON([('tid', 9), ('cmd', ['domain_attachment', 'FMinIter_Domain']), ('workdir', None), ('idxs', SON([('data_path', [9]), ('model', [9])])), ('vals', SON([('data_path', [0]), ('model', [1])]))])), ('exp_key', 'test_run'), ('owner', ['archlinux:24740']), ('version', 0), ('book_time', datetime.datetime(2018, 9, 19, 10, 33, 25, 757000)), ('refresh_time', datetime.datetime(2018, 9, 19, 10, 33, 25, 757000))]),
 SON([('_id', ObjectId('5ba225f897b7ea5f194a58d5')), ('state', 2), ('tid', 10), ('spec', None), ('result', SON([('loss', 0.3313511066115411), ('status', 'ok')])), ('misc', SON([('tid', 10), ('cmd', ['domain_attachment', 'FMinIter_Domain']), ('workdir', None), ('idxs', SON([('data_path', [10]), ('model', [10])])), ('vals', SON([('data_path', [1]), ('model', [0])]))])), ('exp_key', 'test_run'), ('owner', ['archlinux:26251']), ('version', 3), ('book

In [11]:
hyperopt.space_eval(space, trials.argmin)

({'name': sklearn.naive_bayes.GaussianNB},
 '../data/TCGA_Metabric_RNA_TSNE_5.csv',
 '../data/TCGA_Metabric_2_years.csv')