In [None]:
import csv
import time
import pickle
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
import pandas as pd

# hyperparameter optimization routines
from hyperopt import hp
from hyperopt import tpe
from hyperopt import fmin
from hyperopt import Trials
from hyperopt import STATUS_OK
from hyperopt.pyll.stochastic import sample

# persistence images routines
import PersistenceImages.persistence_images as pimgs

from sklearn.decomposition import PCA
from sklearn.model_selection import cross_validate
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import os.path

from HyperoptUtils import *
from BoneData import *

## Persistence Images

In [None]:
# -------------------
# Perform a Bayesian optimization search across persistence images
bone_df = get_bone_data_df()
dgm_df = bone_df[['dgm']]
target_df = bone_df['trabnum']
scorer = make_scorer(mean_squared_error)
max_evals = 495
cv=6

# precompute the persistence image region over the full dataset
birth_range = (0, 0.5)
pers_range = (0, 0.61)
max_death = 0.7

pipeline_ridge = Pipeline([('scaler', StandardScaler()), ('ridge', Ridge())])

param_space = {'estimator_params': 
                {
                'method': pipeline_ridge,
                'kwargs':  {
                            'ridge__normalize':False,
                            'ridge__fit_intercept':True,
                            'ridge__alpha':hp.loguniform('alphas', -10, 0)
                            }
                },
                'dgm_vec_params':
                {
                'method': vec_dgm_by_per_images,
                'kwargs': {
                            'birth_range': birth_range,
                            'pers_range': pers_range,
                            'max_death': max_death,
                            'pixel_size': hp.uniform('pixel_size', 0.01, 0.1),
                            'weight_params': {'n': hp.uniform('n', 1, 3)},
                            'kernel_params': {'sigma': hp.uniform('sigma', 0.005, 0.1)},
                            'do_plot': False
                            }
                }
            }


# ---------------------------------
# create the objective function to minimize, passing in all fixed arguments
objective = lambda params: cv_objective(params,
                                        dgm_df=dgm_df,
                                        target_df=target_df, 
                                        scorer=scorer,
                                        cv=cv,
                                        verbose=True)


# continue parameterization run if already started
if os.path.isfile('data/complex_vec_dgm_bayes_trials.pickle'):
    with open('data/complex_vec_dgm_bayes_trials.pickle','rb') as f:
        bayes_trials = pickle.load(f)
else:
    bayes_trials = Trials()

# run the hyperparamter optimization
best = fmin(fn=objective, 
            space=param_space, 
            algo=tpe.suggest, 
            max_evals=max_evals, 
            trials=bayes_trials)

## Grabbing And Sorting

In [None]:
bone_df = get_bone_data_df()
dgm_df = bone_df[['dgm']]
target_df = bone_df['trabnum']
scorer = make_scorer(mean_squared_error)
max_evals = 495
cv=5

# precompute the persistence image region over the full dataset
birth_range = (0, 1)
pers_range = (0, 1)

pipeline_ridge = Pipeline([('scaler', StandardScaler()), ('ridge', Ridge())])

param_space = {'estimator_params': 
                {
                'method': pipeline_ridge,
                'kwargs':  {
                            'ridge__normalize':False,
                            'ridge__fit_intercept':True,
                            'ridge__alpha':hp.loguniform('alphas', -10, 0)
                            }
                },
                'dgm_vec_params':
                {
                'method': vec_dgm_by_per,
                'kwargs': {
                             'start': hp.quniform('start', 0, 150, 1),
                             'num_pairs': hp.quniform('num_pairs', 1, 150, 1), 
                             'per_only': hp.choice('per_only', [True, False])
                            }
                }
            }


# ---------------------------------
# create the objective function to minimize, passing in all fixed arguments
objective = lambda params: cv_objective(params,
                                        dgm_df=dgm_df,
                                        target_df=target_df, 
                                        scorer=scorer,
                                        cv=cv,
                                        verbose=True)


# continue parameterization run if already started
if os.path.isfile('data/complex_vec_dgm_bayes_trials.pickle'):
    with open('data/complex_vec_dgm_bayes_trials.pickle','rb') as f:
        bayes_trials = pickle.load(f)
else:
    bayes_trials = Trials()

# run the hyperparamter optimization
best = fmin(fn=objective, 
            space=param_space, 
            algo=tpe.suggest, 
            max_evals=max_evals, 
            trials=bayes_trials)