Skip to content

Commit

Permalink
Renaming of fields in GLM
Browse files Browse the repository at this point in the history
prior1 -> prior
beta_constraint -> beta_constraints
beta_eps -> beta_epsilon

Removed fields in GLM and DL

n_folds
  • Loading branch information
tomkraljevic committed Apr 8, 2015
1 parent 32c33fe commit d21180a
Show file tree
Hide file tree
Showing 61 changed files with 133 additions and 129 deletions.
Expand Up @@ -59,7 +59,7 @@
},
{
"type": "cs",
"input": "buildModel 'deeplearning', {\"destination_key\":\"deeplearning-98393399-59c6-4828-97a0-b8d5d458c8f3\",\"training_frame\":\"train.hex\",\"validation_frame\":\"test.hex\",\"dropNA20Cols\":false,\"response_column\":\"C785\",\"n_folds\":0,\"activation\":\"Tanh\",\"hidden\":[50,50],\"epochs\":\"0.1\",\"loss\":\"MeanSquare\",\"variable_importances\":false,\"replicate_training_data\":true,\"balance_classes\":false,\"checkpoint\":\"\",\"use_all_factor_levels\":true,\"train_samples_per_iteration\":-2,\"adaptive_rate\":true,\"rho\":0.99,\"epsilon\":1e-8,\"input_dropout_ratio\":0,\"hidden_dropout_ratios\":[],\"l1\":0,\"l2\":0,\"score_interval\":5,\"score_training_samples\":10000,\"score_validation_samples\":0,\"autoencoder\":false,\"class_sampling_factors\":[],\"max_after_balance_size\":5,\"keep_cross_validation_splits\":false,\"override_with_best_model\":true,\"target_ratio_comm_to_comp\":0.02,\"seed\":-2362970147619006000,\"rate\":0.005,\"rate_annealing\":0.000001,\"rate_decay\":1,\"momentum_start\":0,\"momentum_ramp\":1000000,\"momentum_stable\":0,\"nesterov_accelerated_gradient\":true,\"max_w2\":\"Infinity\",\"initial_weight_distribution\":\"UniformAdaptive\",\"initial_weight_scale\":1,\"score_duty_cycle\":0.1,\"classification_stop\":0,\"regression_stop\":0.000001,\"max_hit_ratio_k\":10,\"score_validation_sampling\":\"Uniform\",\"diagnostics\":true,\"fast_mode\":true,\"ignore_const_cols\":true,\"force_load_balance\":true,\"single_node_mode\":false,\"shuffle_training_data\":false,\"missing_values_handling\":\"MeanImputation\",\"quiet_mode\":false,\"max_confusion_matrix_size\":20,\"sparse\":false,\"col_major\":false,\"average_activation\":0,\"sparsity_beta\":0,\"max_categorical_features\":2147483647,\"reproducible\":false}"
"input": "buildModel 'deeplearning', {\"destination_key\":\"deeplearning-98393399-59c6-4828-97a0-b8d5d458c8f3\",\"training_frame\":\"train.hex\",\"validation_frame\":\"test.hex\",\"dropNA20Cols\":false,\"response_column\":\"C785\",\"activation\":\"Tanh\",\"hidden\":[50,50],\"epochs\":\"0.1\",\"loss\":\"MeanSquare\",\"variable_importances\":false,\"replicate_training_data\":true,\"balance_classes\":false,\"checkpoint\":\"\",\"use_all_factor_levels\":true,\"train_samples_per_iteration\":-2,\"adaptive_rate\":true,\"rho\":0.99,\"epsilon\":1e-8,\"input_dropout_ratio\":0,\"hidden_dropout_ratios\":[],\"l1\":0,\"l2\":0,\"score_interval\":5,\"score_training_samples\":10000,\"score_validation_samples\":0,\"autoencoder\":false,\"class_sampling_factors\":[],\"max_after_balance_size\":5,\"keep_cross_validation_splits\":false,\"override_with_best_model\":true,\"target_ratio_comm_to_comp\":0.02,\"seed\":-2362970147619006000,\"rate\":0.005,\"rate_annealing\":0.000001,\"rate_decay\":1,\"momentum_start\":0,\"momentum_ramp\":1000000,\"momentum_stable\":0,\"nesterov_accelerated_gradient\":true,\"max_w2\":\"Infinity\",\"initial_weight_distribution\":\"UniformAdaptive\",\"initial_weight_scale\":1,\"score_duty_cycle\":0.1,\"classification_stop\":0,\"regression_stop\":0.000001,\"max_hit_ratio_k\":10,\"score_validation_sampling\":\"Uniform\",\"diagnostics\":true,\"fast_mode\":true,\"ignore_const_cols\":true,\"force_load_balance\":true,\"single_node_mode\":false,\"shuffle_training_data\":false,\"missing_values_handling\":\"MeanImputation\",\"quiet_mode\":false,\"max_confusion_matrix_size\":20,\"sparse\":false,\"col_major\":false,\"average_activation\":0,\"sparsity_beta\":0,\"max_categorical_features\":2147483647,\"reproducible\":false}"
},
{
"type": "md",
Expand All @@ -82,4 +82,4 @@
"input": "grid inspect \"Training Metrics\", getModel \"deeplearning-98393399-59c6-4828-97a0-b8d5d458c8f3\""
}
]
}
}
4 changes: 2 additions & 2 deletions h2o-docs/src/product/flow/packs/examples/GLM Example.flow
Expand Up @@ -39,7 +39,7 @@
},
{
"type": "cs",
"input": "buildModel 'glm', {\"destination_key\":\"glm-072b6da6-fc66-4288-b122-656db078301e\",\"training_frame\":\"abalone1.hex\",\"ignored_columns\":[\"C2\",\"C3\",\"C4\",\"C5\",\"C6\",\"C7\",\"C8\"],\"dropNA20Cols\":false,\"response_column\":\"C1\",\"solver\":\"ADMM\",\"max_iterations\":-1,\"beta_eps\":0,\"standardize\":false,\"family\":\"gaussian\",\"n_folds\":0,\"balance_classes\":false,\"link\":\"family_default\",\"tweedie_variance_power\":\"NaN\",\"tweedie_link_power\":\"NaN\",\"alpha\":[0.3],\"lambda\":[0.002],\"lambda_search\":false,\"use_all_factor_levels\":false,\"class_sampling_factors\":[],\"max_after_balance_size\":5,\"prior1\":0,\"nlambdas\":-1,\"lambda_min_ratio\":-1}"
"input": "buildModel 'glm', {\"destination_key\":\"glm-072b6da6-fc66-4288-b122-656db078301e\",\"training_frame\":\"abalone1.hex\",\"ignored_columns\":[\"C2\",\"C3\",\"C4\",\"C5\",\"C6\",\"C7\",\"C8\"],\"dropNA20Cols\":false,\"response_column\":\"C1\",\"solver\":\"ADMM\",\"max_iterations\":-1,\"beta_epsilon\":0,\"standardize\":false,\"family\":\"gaussian\",\"balance_classes\":false,\"link\":\"family_default\",\"tweedie_variance_power\":\"NaN\",\"tweedie_link_power\":\"NaN\",\"alpha\":[0.3],\"lambda\":[0.002],\"lambda_search\":false,\"use_all_factor_levels\":false,\"class_sampling_factors\":[],\"max_after_balance_size\":5,\"prior\":0,\"nlambdas\":-1,\"lambda_min_ratio\":-1}"
},
{
"type": "md",
Expand All @@ -58,4 +58,4 @@
"input": "grid inspect \"Coefficient Magnitudes\", getModel \"glm-072b6da6-fc66-4288-b122-656db078301e\""
}
]
}
}
3 changes: 1 addition & 2 deletions h2o-py/tests/testdir_algos/glm/pyunit_NOFEATURE_benignGLM.py
Expand Up @@ -12,8 +12,7 @@ def benign(ip,port):
X = [x for x in range(2,11) if x != Y]

#Log.info("Build the model")
model = h2o.glm(y=training_data[Y].asfactor(), x=training_data[X], family="binomial", n_folds=0, alpha=[0], Lambda=[1e-5])
#model = h2o.glm(y=training_data[Y].asfactor(), x=training_data[X], family="binomial", n_folds=5, alpha=[0], Lambda=[1e-5])
model = h2o.glm(y=training_data[Y].asfactor(), x=training_data[X], family="binomial", alpha=[0], Lambda=[1e-5])

#Log.info("Check that the columns used in the model are the ones we passed in.")
#Log.info("===================Columns passed in: ================")
Expand Down
6 changes: 3 additions & 3 deletions h2o-py/tests/testdir_algos/glm/pyunit_NOFEATURE_covtypeGLM.py
Expand Up @@ -22,15 +22,15 @@ def covtype(ip,port):
#covtype.summary()

# L2: alpha = 0, lambda = 0
covtype_mod1 = h2o.glm(y=covtype[myY], x=covtype[myX], family="binomial", n_folds=0, alpha=[0], Lambda=[0])
covtype_mod1 = h2o.glm(y=covtype[myY], x=covtype[myX], family="binomial", alpha=[0], Lambda=[0])
covtype_mod1.show()

# Elastic: alpha = 0.5, lambda = 1e-4
covtype_mod2 = h2o.glm(y=covtype[myY], x=covtype[myX], family="binomial", n_folds=0, alpha=[0.5], Lambda=[1e-4])
covtype_mod2 = h2o.glm(y=covtype[myY], x=covtype[myX], family="binomial", alpha=[0.5], Lambda=[1e-4])
covtype_mod2.show()

# L1: alpha = 1, lambda = 1e-4
covtype_mod3 = h2o.glm(y=covtype[myY], x=covtype[myX], family="binomial", n_folds=0, alpha=[1], Lambda=[1e-4])
covtype_mod3 = h2o.glm(y=covtype[myY], x=covtype[myX], family="binomial", alpha=[1], Lambda=[1e-4])
covtype_mod3.show()

if __name__ == "__main__":
Expand Down
Expand Up @@ -21,19 +21,19 @@ def covtype_getModel(ip,port):
#covtype_data.summary()

# L2: alpha = 0, lambda = 0
covtype_mod1 = h2o.glm(y=covtype[Y], x=covtype[X], family="binomial", n_folds=0, alpha=[0], Lambda=[0])
covtype_mod1 = h2o.glm(y=covtype[Y], x=covtype[X], family="binomial", alpha=[0], Lambda=[0])
covtype_mod1.show()
covtype_mod1 = h2o.getModel(covtype_mod1._key)
covtype_mod1.show()

# Elastic: alpha = 0.5, lambda = 1e-4
covtype_mod2 = h2o.glm(y=covtype[Y], x=covtype[X], family="binomial", n_folds=0, alpha=[0.5], Lambda=[1e-4])
covtype_mod2 = h2o.glm(y=covtype[Y], x=covtype[X], family="binomial", alpha=[0.5], Lambda=[1e-4])
covtype_mod2.show()
covtype_mod2 = h2o.getModel(covtype_mod2._key)
covtype_mod2.show()

# L1: alpha = 1, lambda = 1e-4
covtype_mod3 = h2o.glm(y=covtype[Y], x=covtype[X], family="binomial", n_folds=0, alpha=[1], Lambda=[1e-4])
covtype_mod3 = h2o.glm(y=covtype[Y], x=covtype[X], family="binomial", alpha=[1], Lambda=[1e-4])
covtype_mod3.show()
covtype_mod3 = h2o.getModel(covtype_mod3._key)
covtype_mod3.show()
Expand Down
Expand Up @@ -10,7 +10,7 @@ def perfectSeparation_unbalanced(ip,port):
data = h2o.import_frame(h2o.locate("smalldata/synthetic_perfect_separation/unbalanced.csv"))

print("Fit model on dataset.")
model = h2o.glm(x=data[["x1", "x2"]], y=data["y"], family="binomial", lambda_search=True, use_all_factor_levels=True, alpha=[0.5], n_folds=0, Lambda=[0])
model = h2o.glm(x=data[["x1", "x2"]], y=data["y"], family="binomial", lambda_search=True, use_all_factor_levels=True, alpha=[0.5], Lambda=[0])

print("Extract models' coefficients and assert reasonable values (ie. no greater than 50)")
print("Unbalanced dataset")
Expand Down
Expand Up @@ -12,13 +12,13 @@ def shuffling_large(ip,port):


print("Create model on original Arcene dataset.")
h2o_model = h2o.glm(x=train_data[0:1000], y=train_data[1000], family="binomial", lambda_search=True, alpha=[0.5], n_folds=0, use_all_factor_levels=True)
h2o_model = h2o.glm(x=train_data[0:1000], y=train_data[1000], family="binomial", lambda_search=True, alpha=[0.5], use_all_factor_levels=True)

print("Create second model on original Arcene dataset.")
h2o_model_2 = h2o.glm(x=train_data[0:1000], y=train_data[1000], family="binomial", lambda_search=True, alpha=[0.5], n_folds=0, use_all_factor_levels=True)
h2o_model_2 = h2o.glm(x=train_data[0:1000], y=train_data[1000], family="binomial", lambda_search=True, alpha=[0.5], use_all_factor_levels=True)

print("Create model on shuffled Arcene dataset.")
h2o_model_s = h2o.glm(x=train_data_shuffled[0:1000], y=train_data_shuffled[1000], family="binomial", lambda_search=True, alpha=[0.5], n_folds=0, use_all_factor_levels=True)
h2o_model_s = h2o.glm(x=train_data_shuffled[0:1000], y=train_data_shuffled[1000], family="binomial", lambda_search=True, alpha=[0.5], use_all_factor_levels=True)

print("Assert that number of predictors remaining and their respective coefficients are equal.")

Expand Down
Expand Up @@ -23,7 +23,7 @@ def link_functions_binomial(ip,port):
myX = ["ID","AGE","RACE","GLEASON","DCAPS","PSA","VOL","DPROS"]

print("Create models with canonical link: LOGIT")
h2o_model = h2o.glm(x=h2o_data[myX], y=h2o_data[myY].asfactor(), family="binomial", link="logit",alpha=[0.5], Lambda=[0], n_folds=0)
h2o_model = h2o.glm(x=h2o_data[myX], y=h2o_data[myY].asfactor(), family="binomial", link="logit",alpha=[0.5], Lambda=[0])
sm_model = sm.GLM(endog=sm_data_response, exog=sm_data_features, family=sm.families.Binomial(sm.families.links.logit)).fit()

print("Compare model deviances for link function logit")
Expand Down
Expand Up @@ -23,7 +23,7 @@ def link_functions_gamma(ip,port):
myX = ["ID","AGE","RACE","GLEASON","DCAPS","PSA","VOL","CAPSULE"]

print("Create models with canonical link: INVERSE")
h2o_model_in = h2o.glm(x=h2o_data[myX], y=h2o_data[myY], family="gamma", link="inverse",alpha=[0.5], Lambda=[0], n_folds=0)
h2o_model_in = h2o.glm(x=h2o_data[myX], y=h2o_data[myY], family="gamma", link="inverse",alpha=[0.5], Lambda=[0])
sm_model_in = sm.GLM(endog=sm_data_response, exog=sm_data_features, family=sm.families.Gamma(sm.families.links.inverse_power)).fit()

print("Compare model deviances for link function inverse")
Expand All @@ -32,7 +32,7 @@ def link_functions_gamma(ip,port):
assert h2o_deviance_in - sm_deviance_in < 0.01, "expected h2o to have an equivalent or better deviance measures"

print("Create models with canonical link: LOG")
h2o_model_log = h2o.glm(x=h2o_data[myX], y=h2o_data[myY], family="gamma", link="log",alpha=[0.5], Lambda=[0], n_folds=0)
h2o_model_log = h2o.glm(x=h2o_data[myX], y=h2o_data[myY], family="gamma", link="log",alpha=[0.5], Lambda=[0])
sm_model_log = sm.GLM(endog=sm_data_response, exog=sm_data_features, family=sm.families.Gamma(sm.families.links.log)).fit()

print("Compare model deviances for link function log")
Expand Down
Expand Up @@ -23,7 +23,7 @@ def link_functions_gaussian(ip,port):
myX = ["ID","AGE","RACE","CAPSULE","DCAPS","PSA","VOL","DPROS"]

print("Create models with canonical link: IDENTITY")
h2o_model = h2o.glm(x=h2o_data[myX], y=h2o_data[myY], family="gaussian", link="identity",alpha=[0.5], Lambda=[0], n_folds=0)
h2o_model = h2o.glm(x=h2o_data[myX], y=h2o_data[myY], family="gaussian", link="identity",alpha=[0.5], Lambda=[0])
sm_model = sm.GLM(endog=sm_data_response, exog=sm_data_features, family=sm.families.Gaussian(sm.families.links.identity)).fit()

print("Compare model deviances for link function identity")
Expand Down
Expand Up @@ -22,7 +22,7 @@ def link_functions_poisson(ip,port):
myX = ["ID","AGE","RACE","CAPSULE","DCAPS","PSA","VOL","DPROS"]

print("Create h2o model with canonical link: LOG")
h2o_model_log = h2o.glm(x=h2o_data[myX], y=h2o_data[myY], family="poisson", link="log",alpha=[0.5], Lambda=[0], n_folds=0)
h2o_model_log = h2o.glm(x=h2o_data[myX], y=h2o_data[myY], family="poisson", link="log",alpha=[0.5], Lambda=[0])

print("Create statsmodel model with canonical link: LOG")
sm_model_log = sm.GLM(endog=sm_data_response, exog=sm_data_features, family=sm.families.Poisson(sm.families.links.log)).fit()
Expand All @@ -33,7 +33,7 @@ def link_functions_poisson(ip,port):
assert h2o_deviance_log - sm_deviance_log < 0.01, "expected h2o to have an equivalent or better deviance measures"

print("Create h2o models with link: IDENTITY")
h2o_model_id = h2o.glm(x=h2o_data[myX], y=h2o_data[myY], family="poisson", link="identity",alpha=[0.5], Lambda=[0], n_folds=0)
h2o_model_id = h2o.glm(x=h2o_data[myX], y=h2o_data[myY], family="poisson", link="identity",alpha=[0.5], Lambda=[0])

print("Create statsmodel models with link: IDENTITY")
sm_model_id = sm.GLM(endog=sm_data_response, exog=sm_data_features, family=sm.families.Poisson(sm.families.links.identity)).fit()
Expand Down
Expand Up @@ -11,7 +11,7 @@ def perfectSeparation_balanced(ip,port):
data = h2o.import_frame(path=h2o.locate("smalldata/synthetic_perfect_separation/balanced.csv"))

print("Fit model on dataset")
model = h2o.glm(x=data[["x1", "x2"]], y=data["y"], family="binomial", lambda_search=True, use_all_factor_levels=True, alpha=[0.5], n_folds=0, Lambda=[0])
model = h2o.glm(x=data[["x1", "x2"]], y=data["y"], family="binomial", lambda_search=True, use_all_factor_levels=True, alpha=[0.5], Lambda=[0])

print("Extract models' coefficients and assert reasonable values (ie. no greater than 50)")
print("Balanced dataset")
Expand Down
Expand Up @@ -14,7 +14,7 @@ def wide_dataset_large(ip,port):
trainData = h2o.H2OFrame(np.column_stack((trainDataResponse, trainDataFeatures)).tolist())

print("Run model on 3250 columns of Arcene with strong rules off.")
model = h2o.glm(x=trainData[1:3250], y=trainData[0].asfactor(), family="binomial", lambda_search=False, alpha=[1], n_folds=0, use_all_factor_levels=True)
model = h2o.glm(x=trainData[1:3250], y=trainData[0].asfactor(), family="binomial", lambda_search=False, alpha=[1], use_all_factor_levels=True)

print("Test model on validation set.")
validDataResponse = np.genfromtxt(h2o.locate("smalldata/arcene/arcene_valid_labels.labels"), delimiter=' ')
Expand Down

0 comments on commit d21180a

Please sign in to comment.