In [42]:
from h2o.estimators.gbm import H2OGradientBoostingEstimator
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
from h2o.estimators.deeplearning import H2ODeepLearningEstimator
from h2o.estimators.random_forest import H2ORandomForestEstimator
from h2o.model.metrics_base import H2OBinomialModelMetrics as H2OBinomialModelMetrics
from h2o.model.metrics_base import H2OMultinomialModelMetrics as H2OMultinomialModelMetrics
from h2o.model.metrics_base import H2ORegressionModelMetrics as H2ORegressionModelMetrics

from h2o import save_model as save_model
from h2o import load_model as load_model
from h2o import init as init
from h2o import connect as connect
from h2o import connection as connection
from h2o import cluster as cluster


from h2o import H2OFrame as H2OFrame
from pandas import DataFrame as DataFrame
import json
import time
from collections import OrderedDict as OrderedDict
from hashlib import md5 as md5
from hashlib import sha256 as sha256
from os import makedirs as mkdir
from os import path as path
from shutil import copyfile
import copy

__name__ = 'H2OHandler'

class H2OHandler(object):
    """
    H2OHandler

    Train: Get some analysis list of tuplas (analysis_results.json +  (algorithm + normalzations over a Dataframe) and launch de analysis on H2O platform
    predict: Get some list of [analysis_results.json] and load and execute algorithm
    Algorithms and model operation:

    "H2OGradientBoostingEstimator" :{
      "id" : "number",
      "module" : "h2o.estimators.gbm",
      "types": {
        "binomial": {"active" : true, "valued": "enum"},
        "multinomial": {"active" : true, "valued": "enum"},
        "regression" : {"active" : true, "valued": "float64"},
        "topology" : {"active" : false, "valued": "float64"}
      }

    "H2OGeneralizedLinearEstimator" :{
      "id" : "number",
      "module" : "h2o.estimators.glm",
      "types": {
        "binomial": {"active" : true, "valued": "enum"},
        "multinomial": {"active" : true, "valued": "enum"},
        "regression" : {"active" : true, "valued": "float64"},
        "topology" : {"active" : false, "valued": "float64"}
      }

    "H2ODeepLearningEstimator" :{
      "id" : "number",
      "module" : "h2o.estimators.deeplearning",
      "types": {
        "binomial": {"active" : true, "valued": "enum"},
        "multinomial": {"active" : true, "valued": "enum"},
        "regression" : {"active" : true, "valued": "float64"},
        "topology" : {"active" : false, "valued": "float64"}
      }

    "H2ORandomForestEstimator" :{
      "id" : "number",
      "module" : "h2o.estimators.random_forest",
      "types": {
        "types": {
          "binomial": {"active" : true, "valued": "enum"},
          "multinomial": {"active" : true, "valued": "enum"},
          "regression" : {"active" : true, "valued": "float64"},
          "topology" : {"active" : false, "valued": "float64"}
        }

    Status Codes:
        -1 : Uninitialized
        0: Success
        1: Error

    """

    def __init__(self):
        self._model_base = None
        self.path_localfs = r'D:/Data/models'
        self.path_hdfs = None
        self.url = 'http://127.0.0.1:54321'
        self.nthreads = 6
        self.ice_root = r'D:/Data/logs'
        self.max_mem_size = '8G'
        self.start_h2o = True
        self._debug = True
        self._framework = 'h2o'

        try:
            self._h2o_session = connect(url=self.url)
        except:
            init(url=self.url, nthreads=self.nthreads, ice_root=self.ice_root, max_mem_size=self.max_mem_size)
            self._h2o_session = connection()

        print('Session_id: ' + self._h2o_session.session_id())

    def __del__(self):
        self._h2o_session.close()

    @classmethod
    def shutdown_cluster(cls):
        try:
            cluster().shutdown()
        except:
            print('H20-cluster not working')

    @staticmethod
    def _hash_keys(hash_type, filename):

            if hash_type == 'MD5':
                return md5(open(filename, 'rb').read()).hexdigest()
            elif hash_type == 'SHA256':
                return sha256(open(filename, 'rb').read()).hexdigest()

    def order_training(self, analysis_id, training_frame, valid_frame, analysis_list):
        assert isinstance(analysis_id, str)
        assert isinstance(training_frame, DataFrame)
        assert isinstance(valid_frame, DataFrame) or valid_frame is None
        assert isinstance(analysis_list, list)


        status = -1  # Operation Code
        model_list = list()
        analysis_timestamp = str(time.time())

        training_frame = H2OFrame(python_obj=training_frame)
        if valid_frame is not None:
            valid_frame = H2OFrame(python_obj=valid_frame)

        for algorithm_description, normalization in analysis_list:

            # Initializing base structures
            struct_ar = json.load(algorithm_description, object_pairs_hook=OrderedDict)
            #print(struct_ar)

            # Generating base_path
            load_path = list()
            load_path.append(self.path_localfs)
            load_path.append('/')
            load_path.append(self._framework)
            load_path.append('/')
            load_path.append(struct_ar['model_id'])
            load_path.append('/')
            load_path.append('train')
            load_path.append('/')
            base_path = ''.join(load_path)
            if not path.exists(base_path):
                mkdir(base_path, 0o0777)

            if normalization is not None:
                struct_ns = json.load(normalization, object_pairs_hook=OrderedDict)
            else:
                struct_ns = None

            assert isinstance(struct_ar, OrderedDict)
            assert isinstance(struct_ns, OrderedDict) or normalization is None

            for each_model in struct_ar['model_parameters']['h2o']:

                final_ar_model = copy.deepcopy(struct_ar)
                final_ar_model['type'] = 'train'
                final_ar_model['timestamp'] = analysis_timestamp
                model_timestamp = str(time.time())

                y = each_model['parameters']['response_column']['value']
                x = training_frame.col_names
                x.remove(y)

                if each_model['types'][0]['active']:
                    if each_model['types'][0]['type'] in ['binomial', 'multinomial']:
                        training_frame[y] = training_frame[y].asfactor()
                        if valid_frame is not None:
                            valid_frame[y] = valid_frame[y].asfactor()

                model_command = list()
                model_command.append(each_model['model'])
                model_command.append("(")
                model_command.append("training_frame=training_frame")

                if valid_frame is not None:
                    model_command.append(", validation_frame=valid_frame")

                model_id = each_model['model'] + '_' + model_timestamp
                model_command.append(", model_id='%s'" % model_id)

                for key, value in each_model['parameters'].items():
                    if value['seleccionable']:
                        if isinstance(value['value'], str):
                            model_command.append(", %s=\'%s\'" % (key, value['value']))
                        else:
                            model_command.append(", %s=%s" % (key, value['value']))

                model_command.append(")")
                model_command = ''.join(model_command)
                print(model_command)

                #Modify when hdfs method will be implemented actually first value must be localfs

                # Generating model
                if self._debug:
                            connection().start_logging(
                                base_path + final_ar_model['log_path'][0]['value']+'\DEBUG'+model_id+'.log')
                self._model_base = eval(model_command)
                if valid_frame is not None:
                    self._model_base.train(x=x, y=y, training_frame=training_frame, validation_frame=valid_frame)
                else:
                    self._model_base.train(x=x, y=y, training_frame=training_frame)
                if self._debug:
                    connection().stop_logging()

                model_path = base_path + final_ar_model['load_path'][0]['value']
                if not path.exists(model_path):
                    mkdir(model_path, 0o0777)
                save_model(model=self._model_base, path=model_path, force=True)


                # Generating json ar.json
                # setting load_path
                counter_loop = True  #define First element
                for each_storage_type in final_ar_model['load_path']:
                    if counter_loop:
                        if each_storage_type['type'] == 'localfs':
                            if not path.exists(model_path):
                                mkdir(model_path, 0o0777)

                            each_storage_type['value'] = model_path + '/' + model_id
                            # Generating hash tags
                            for each_hash_type in each_storage_type['hash_list']:
                                if each_hash_type['type'] == 'MD5':
                                    each_hash_type['value'] = self._hash_keys(each_hash_type['type'],
                                                                              each_storage_type['value'])
                                elif each_hash_type['type'] == 'SHA256':
                                    each_hash_type['value'] = self._hash_keys(each_hash_type['type'],
                                                                              each_storage_type['value'])
                        elif each_storage_type['type'] == 'hdfs':
                            None
                        counter_loop = False
                    else:
                        if each_storage_type['type'] == 'localfs':
                            if not path.exists(base_path + each_storage_type['value']):
                                mkdir(base_path + each_storage_type['value'], 0o0777)
                            each_storage_type['value'] = base_path + each_storage_type['value'] + '/' + model_id
                            self._replicate_file(each_storage_type['type'],
                                                 each_storage_type['value'],
                                                 final_ar_model['load_path'][0]['type'],
                                                 final_ar_model['load_path'][0]['value'])
                            # Generating hash tags
                            for each_hash_type in each_storage_type['hash_list']:
                                if each_hash_type['type'] == 'MD5':
                                    each_hash_type['value'] = self._hash_keys(each_hash_type['type'],
                                                                              each_storage_type['value'])
                                elif each_hash_type['type'] == 'SHA256':
                                    each_hash_type['value'] = self._hash_keys(each_hash_type['type'],
                                                                              each_storage_type['value'])
                        elif each_storage_type['type'] == 'hdfs':
                            None

                # Generating model parameters
                final_ar_model['model_parameters']['h2o'] = list()
                final_ar_model['model_parameters']['h2o'].append(each_model.copy())
                final_ar_model['model_parameters']['h2o'][0]['parameters']['model_id'] = model_id

                # Generating metrics
                final_ar_model['metrics'] = OrderedDict()
                final_ar_model['metrics']['train'] = self._generate_metrics(dataframe=None, source='train')
                if valid_frame is not None:
                    final_ar_model['metrics']['valid'] = self._generate_metrics(dataframe=None, source='valid')
                final_ar_model['metrics']['xval'] = self._generate_metrics(dataframe=None, source='xval')

                # Generating log_path
                for each_storage_type in final_ar_model['log_path']:
                    if each_storage_type['type'] == 'localfs':
                        if not path.exists(base_path + each_storage_type['value']):
                            mkdir(base_path + each_storage_type['value'], 0o0777)
                        each_storage_type['value'] = base_path + each_storage_type['value'] + '/' + model_id + '.log'
                    elif each_storage_type['type'] == 'hdfs':
                        None

                # writing ar.json file
                json_files = list()
                for each_storage_type in final_ar_model['json_path']:
                    if each_storage_type['type'] == 'localfs':
                        if not path.exists(base_path + each_storage_type['value']):
                            mkdir(base_path + each_storage_type['value'], 0o0777)
                        each_storage_type['value'] = base_path + each_storage_type['value'] + '/' + model_id + '.json'
                        json_files.append(each_storage_type)
                    elif each_storage_type['type'] == 'hdfs':
                        None
                    elif each_storage_type['type'] == 'mongoDB':
                        None
                self._store_files(json_files, final_ar_model)

                model_list.append(final_ar_model)

            return analysis_id, model_list

    def predict(self, dataframe, algorithm_description):
        model_timestamp = str(time.time())

        struct_ar = json.load(algorithm_description, object_pairs_hook=OrderedDict)
        load_fails = True
        hash_fails = True
        counter_storage = 0
        counter_hash = 0

        assert isinstance(struct_ar['load_path'], list)
        while counter_storage < len(struct_ar['load_path']) and load_fails:
            while counter_hash < len(struct_ar['load_path'][counter_storage]['hash_list']) and \
                    hash_fails:
                print('generated_key %s' % self._hash_keys(struct_ar['load_path'][counter_storage]['hash_list']
                                                           [counter_hash]['type'],
                                                           struct_ar['load_path'][counter_storage]['value']))
                print('storage_key %s' % struct_ar['load_path'][counter_storage]['hash_list'][counter_hash]['value'])
                if self._hash_keys(struct_ar['load_path'][counter_storage]['hash_list'][counter_hash]['type'],
                                   struct_ar['load_path'][counter_storage]['value']) == \
                        struct_ar['load_path'][counter_storage]['hash_list'][counter_hash]['value']:
                    load_fails = False
                    hash_fails = False
                    try:
                        self._model_base = load_model(struct_ar['load_path'][counter_storage]['value'])
                    except:
                        print('Model json: invalid')
                else:
                    counter_hash += 1
            counter_hash = 0
            counter_storage += 1

        if load_fails:
            return ('Necesario cargar un modelo valid o ar.json valido')


        predict_frame = H2OFrame(python_obj=dataframe)

        y = struct_ar['model_parameters']['h2o'][0]['parameters']['response_column']['value']
        if struct_ar['model_parameters']['h2o'][0]['types'][0]['type'] in ['binomial', 'multinomial']:
            predict_frame[y].asfactor()

        struct_ar['type'] = 'predict'
        struct_ar['timestamp'] = model_timestamp
        struct_ar['metrics'] = OrderedDict()
        struct_ar['metrics']['predict'] = self._generate_metrics(predict_frame, source=None)

        # writing ar.json file
        json_files = list()
        for each_storage_type in struct_ar['json_path']:
            if each_storage_type['type'] == 'localfs':
                if not path.exists(path.dirname(each_storage_type['value'].replace('train', 'predict'))):
                    mkdir(path.dirname(each_storage_type['value'].replace('train', 'predict')), 0o0777)
                each_storage_type['value'] = each_storage_type['value'].replace('train', 'predict')\
                    .replace('.json', '_' + model_timestamp + '.json')
                json_files.append(each_storage_type)
            elif each_storage_type['type'] == 'hdfs':
                None
            elif each_storage_type['type'] == 'mongoDB':
                None
        self._store_files(json_files, struct_ar)

        return (self._model_base.predict(predict_frame).as_data_frame(use_pandas=True), struct_ar)

    def _generate_metrics(self, dataframe, source):
        """
        Generate model metrics for this model on test_data.

        :param H2OFrame test_data: Data set for which model metrics shall be computed against. All three of train,
            valid and xval arguments are ignored if test_data is not None.
        :param source 'train': Report the training metrics for the model.
        :param source 'valid': Report the validation metrics for the model.
        :param source 'xval': Report the cross-validation metrics for the model. If train and valid are True, then it
            defaults to True.
        """

        model_metrics = OrderedDict()

        if dataframe is not None:
            perf_metrics = self._model_base.model_performance(dataframe)
        else:
            if source == 'valid':
                perf_metrics = self._model_base.model_performance(valid=True)
            elif source == 'xval':
                perf_metrics = self._model_base.model_performance(xval=True)
            else:
                perf_metrics = self._model_base.model_performance(train=True)

        for parameter, value in perf_metrics._metric_json.items():
            if parameter in ['hit_ratio_table', 'gains_lift_table', 'max_criteria_and_metric_scores']:
                model_metrics[parameter] = value.as_data_frame().to_json(orient='split')
            elif parameter in ['cm']:
                model_metrics[parameter] = value['table'].as_data_frame().to_json(orient='split')
            elif parameter in ['thresholds_and_metric_scores']:
                model_metrics['cm'] = OrderedDict()
                for each_parameter in ['min_per_class_accuracy', 'absolute_mcc', 'precision', 'accuracy',
                                       'f0point5', 'f2', 'f1', 'mean_per_class_accuracy']:
                    model_metrics['cm'][each_parameter] = \
                        perf_metrics.confusion_matrix(
                            metrics=each_parameter).table.as_data_frame().to_json(orient='split')
                model_metrics[parameter] = value.as_data_frame().to_json(orient='split')
            elif not isinstance(value, dict) and parameter not in ['model_checksum', 'frame_checksum', 'description']:
                model_metrics[parameter] = value

        return model_metrics.copy()

    def get_metric(self, algorithm_description, metric, source):  # not tested
        try:
            struct_ar = OrderedDict(json.load(algorithm_description))
            model_metrics = struct_ar['metrics']['source']
        except:
            return ('Necesario cargar un modelo valid o ar.json valido')
        try:
            return struct_ar['metrics']['source'][metric]
        except:
            return 'Not Found'

    @staticmethod
    def _replicate_file(type_dest, path_dest, type_source, path_source):
        if type_source == 'localfs':
            if type_dest == 'localfs':
                if not path.exists(path.dirname(path_dest)):
                    mkdir(path.dirname(path_dest), 0o0777)
                copyfile(path_source, path_dest)
            elif type_dest == 'hdfs':
                None
            elif type_dest == 'mongoDB':
                None
        elif type_source == 'hdfs':
            if type_dest == 'localfs':
                None
            elif type_dest == 'hdfs':
                None
            elif type_dest == 'mongoDB':
                None
        elif type_source == 'mongoDB':
            if type_dest == 'localfs':
                None
            elif type_dest == 'hdfs':
                None
            elif type_dest == 'mongoDB':
                None

    @staticmethod
    def _store_files(json_files, struct_ar):
        for each_storage_type in json_files:
            if each_storage_type['type'] == 'localfs':
                file = open(each_storage_type['value'], 'w')
                json.dump(struct_ar, file, indent=4)
                file.close()
            elif each_storage_type['type'] == 'hdfs':
                None
            elif each_storage_type['type'] == 'mongoDB':
                None

    @staticmethod
    def _store_files(json_files, struct_ar):
        for each_storage_type in json_files:
            if each_storage_type['type'] == 'localfs':
                file = open(each_storage_type['value'], 'w')
                json.dump(struct_ar, file, indent=4)
                file.close()
            elif each_storage_type['type'] == 'hdfs':
                None
            elif each_storage_type['type'] == 'mongoDB':
                None


In [7]:
init(ice_root='D:/Data/logs', nthreads=2, max_mem_size='8G')

Checking whether there is an H2O instance running at http://localhost:54321..... not found.
Attempting to start a local H2O server...
; OpenJDK 64-Bit Server VM (Zulu 8.17.0.3-win64) (build 25.102-b14, mixed mode)
  Starting server from D:\Anaconda3\lib\site-packages\h2o\backend\bin\h2o.jar
  Ice root: D:/Data/logs
  JVM stdout: C:\Users\Public\Documents\Wondershare\CreatorTemp\tmp6nirnv5x\h2o_e2its_started_from_python.out
  JVM stderr: C:\Users\Public\Documents\Wondershare\CreatorTemp\tmp6nirnv5x\h2o_e2its_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321... successful.


0,1
H2O cluster uptime:,03 secs
H2O cluster version:,3.10.4.6
H2O cluster version age:,1 month
H2O cluster name:,H2O_from_python_e2its_e2t404
H2O cluster total nodes:,1
H2O cluster free memory:,7.111 Gb
H2O cluster total cores:,8
H2O cluster allowed cores:,2
H2O cluster status:,"accepting new members, healthy"
H2O connection url:,http://127.0.0.1:54321


In [30]:
connection = connection()

In [15]:
print('h2o_'+ connection.ip() + '-' + connection.port() + '-3-info.log')

h2o_127.0.0.1-54321-3-info.log


In [31]:
type(connection)

h2o.backend.connection.H2OConnection

In [18]:
import h2o
h2o.log_and_echo('Session: %s process %s started at: %s' % (connection.session_id(), 'model_id_name', time.time()))

In [None]:
from h2o.backend import H2OCluster, H2OLocalServer
from h2o.exceptions import H2OConnectionError, H2OServerError, H2OResponseError, H2OValueError

In [43]:
from pandas import DataFrame as DataFrame
from pandas import concat as concat
import numpy as np
import os
from six.moves import cPickle as pickle

def reformat(dataset, labels):
        dataset = DataFrame(dataset.reshape((-1, image_size * image_size)).astype(np.float32))
        # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
        columns = list()
        for each_value in dataset.columns.values:
            columns.append(str(each_value))
        dataset.columns = columns
        labels = DataFrame(labels.reshape((-1, 1)).astype(str))
        columns = list()
        for each_value in labels.columns.values:
            columns.append('objective' + str(each_value))
        labels.columns = columns
        # labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
        return dataset, labels

def convertir_binomial(x):
        if int(x) % 2 == 0:
            return 1
        else:
            return 0

os.chdir('d:/Data/Gdeeplearning-Udacity')
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
        save = pickle.load(f)
        train_dataset = save['train_dataset']
        train_labels = save['train_labels']
        valid_dataset = save['valid_dataset']
        valid_labels = save['valid_labels']
        test_dataset = save['test_dataset']
        test_labels = save['test_labels']
        del save  # hint to help gc free up memory

train_dataset = train_dataset[-20001:-1]
train_labels = train_labels[-20001:-1]

print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Validation set', test_dataset.shape, test_labels.shape)

    # In[5]:

image_size = 28
num_labels = 10

pd_train_dataset, pd_train_labels = reformat(train_dataset, train_labels)
pd_valid_dataset, pd_valid_labels = reformat(valid_dataset, valid_labels)
pd_test_dataset, pd_test_labels = reformat(valid_dataset, valid_labels)
pd_train_dataset = concat([pd_train_dataset, pd_train_labels], axis=1)
pd_valid_dataset = concat([pd_valid_dataset, pd_valid_labels], axis=1)
pd_test_dataset = concat([pd_test_dataset, pd_test_labels], axis=1)

print('Training set', pd_train_dataset.shape)
print('Validation set', pd_valid_dataset.shape)
print('Test set', pd_test_dataset.shape)

    # Binomial_test

pd_train_dataset['objective0'] = pd_train_dataset['objective0'].apply(convertir_binomial)
pd_valid_dataset['objective0'] = pd_valid_dataset['objective0'].apply(convertir_binomial)
pd_test_dataset['objective0'] = pd_test_dataset['objective0'].apply(convertir_binomial)


Training set (20000, 28, 28) (20000,)
Validation set (10000, 28, 28) (10000,)
Validation set (10000, 28, 28) (10000,)
Training set (20000, 785)
Validation set (10000, 785)
Test set (10000, 785)
Connecting to H2O server at http://127.0.0.1:54321... successful.


0,1
H2O cluster uptime:,51 mins 53 secs
H2O cluster version:,3.10.4.6
H2O cluster version age:,1 month
H2O cluster name:,H2O_from_python_e2its_e2t404
H2O cluster total nodes:,1
H2O cluster free memory:,6.346 Gb
H2O cluster total cores:,8
H2O cluster allowed cores:,2
H2O cluster status:,"locked, healthy"
H2O connection url:,http://127.0.0.1:54321


Session_id: _sid_a22e
Parse progress: |█████████████████████████████████████████████████████████| 100%
Parse progress: |█████████████████████████████████████████████████████████| 100%
H2OGradientBoostingEstimator(training_frame=training_frame, validation_frame=valid_frame, model_id='H2OGradientBoostingEstimator_1496396879.7755053', nfolds=2, keep_cross_validation_predictions=True, keep_cross_validation_fold_assignment=True, score_each_iteration=True, score_tree_interval=1000, fold_assignment='auto', response_column='objective0', ignore_const_cols=True, balance_classes=True, max_after_balance_size=5.0, max_hit_ratio_k=3, ntrees=5, max_depth=10, max_runtime_secs=100, learn_rate=0.05, learn_rate_annealing=1.0, distribution='bernoulli')
Now logging all API requests to file 'D:/Data/models/h2o/PoC-binomial/train/log\\DEBUGH2OGradientBoostingEstimator_1496396879.7755053.log'
gbm Model Build progress: |███████████████████████████████████████████████| 100%
Logging stopped.
H2OGeneralizedLinear

In [106]:
model =H2ODeepLearningEstimator(model_id='H2ODeepLearningEstimator_1496401427.7939088.2', nfolds=2, keep_cross_validation_predictions=True, keep_cross_validation_fold_assignment=True, response_column='objective0', score_each_iteration=True, max_confusion_matrix_size=20, max_hit_ratio_k=3, standardize=False, activation='rectifier', hidden=[200, 200], epochs=10.0, train_samples_per_iteration=128, target_ratio_comm_to_comp=0.5, adaptive_rate=True, l2=0.005, distribution='bernoulli', max_runtime_secs=100.0, rate=0.05, rate_annealing=0.0001)

In [110]:
training_frame=H2OFrame(python_obj=pd_train_dataset)
y = 'objective0'
x = training_frame.col_names
training_frame[y] = training_frame[y].asfactor()

model.train(training_frame=training_frame, x=x, y=y)

Parse progress: |█████████████████████████████████████████████████████████| 100%
deeplearning Model Build progress: |██████████████████████████████████████| 100%


In [111]:
params = model.get_params()

In [112]:
import pprint
pprint.pprint(params)

{'activation': {'__meta': {'schema_name': 'ModelParameterSchemaV3',
                           'schema_type': 'Iced',
                           'schema_version': 3},
                'actual_value': 'Rectifier',
                'default_value': 'Rectifier',
                'gridable': True,
                'help': 'Activation function.',
                'is_member_of_frames': [],
                'is_mutually_exclusive_with': [],
                'label': 'activation',
                'level': 'critical',
                'name': 'activation',
                'required': False,
                'type': 'enum',
                'values': ['Tanh',
                           'TanhWithDropout',
                           'Rectifier',
                           'RectifierWithDropout',
                           'Maxout',
                           'MaxoutWithDropout']},
 'adaptive_rate': {'__meta': {'schema_name': 'ModelParameterSchemaV3',
                              'schema_type': 'Iced',
   

In [113]:
print(type(params))
for key, values in params.items():
    #print( 'key: %s - Value %s'% (key, values['actual_value']))
    if values['actual_value'] != values['default_value'] and key not in ['model_id','training_frame','validation_frame','response_column']:
        print( 'ALERT: Modified key: %s - New Value %s - Default Value %s'% (key,values['actual_value'], values['default_value'] ))
    elif key not in ['model_id','training_frame','validation_frame','response_column']:
        print( 'Info: Full Stack Key: %s - New Value %s - Default Value %s'% (key,values['actual_value'], values['default_value'] ))

<class 'dict'>
Info: Full Stack Key: pretrained_autoencoder - New Value None - Default Value None
Info: Full Stack Key: regression_stop - New Value 1e-06 - Default Value 1e-06
Info: Full Stack Key: balance_classes - New Value False - Default Value False
Info: Full Stack Key: force_load_balance - New Value True - Default Value True
Info: Full Stack Key: max_after_balance_size - New Value 5.0 - Default Value 5.0
Info: Full Stack Key: shuffle_training_data - New Value False - Default Value False
ALERT: Modified key: score_each_iteration - New Value True - Default Value False
Info: Full Stack Key: average_activation - New Value 0.0 - Default Value 0.0
Info: Full Stack Key: loss - New Value Automatic - Default Value Automatic
Info: Full Stack Key: checkpoint - New Value None - Default Value None
Info: Full Stack Key: rate_decay - New Value 1.0 - Default Value 1.0
Info: Full Stack Key: initial_weights - New Value None - Default Value None
Info: Full Stack Key: stopping_metric - New Value AUT

In [45]:
h2o.connect()
model = h2o.load_model(r'D:\Data\models\h2o\PoC-binomial\train\models\H2ORandomForestEstimator_1496397096.371754')

Connecting to H2O server at http://localhost:54321... successful.


0,1
H2O cluster uptime:,1 hour 30 mins
H2O cluster version:,3.10.4.6
H2O cluster version age:,1 month and 1 day
H2O cluster name:,H2O_from_python_e2its_e2t404
H2O cluster total nodes:,1
H2O cluster free memory:,6.177 Gb
H2O cluster total cores:,8
H2O cluster allowed cores:,2
H2O cluster status:,"locked, healthy"
H2O connection url:,http://localhost:54321


In [None]:
h2o.cluster().shutdown()

In [None]:
h2o.session_id()

In [114]:
h2o.remove_all()