Skip to content

Commit

Permalink
Improve NN runtime by reducing num_categories_to_keep (#246)
Browse files Browse the repository at this point in the history
* changed default num_categories_to_keep hyperparameter of NN to improve runtimes

* allow NN to be trained for 0 epochs
  • Loading branch information
jwmueller committed Jan 29, 2020
1 parent 5c5ad0d commit 93a00ed
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 1 deletion.
Expand Up @@ -13,7 +13,7 @@ def get_fixed_params():
# Options: [3,4,10, 100, 1000]
'proc.impute_strategy': 'median', # # strategy argument of sklearn.SimpleImputer() used to impute missing numeric values
# Options: ['median', 'mean', 'most_frequent']
'proc.max_category_levels': 500, # maximum number of allowed levels per categorical feature
'proc.max_category_levels': 100, # maximum number of allowed levels per categorical feature
# Options: [10, 100, 200, 300, 400, 500, 1000, 10000]
'proc.skew_threshold': 0.99, # numerical features whose absolute skewness is greater than this receive special power-transform preprocessing. Choose big value to avoid using power-transforms
# Options: [0.2, 0.3, 0.5, 0.8, 1.0, 10.0, 100.0]
Expand Down
17 changes: 17 additions & 0 deletions autogluon/utils/tabular/ml/models/tabular_nn/tabular_nn_model.py
Expand Up @@ -293,6 +293,23 @@ def train_net(self, params, train_dataset, test_dataset=None,
else:
verbose_eval = 1

if num_epochs == 0: # use dummy training loop that stops immediately (useful for using NN just for data preprocessing / debugging)
logger.log(20, "Not training Neural Net since num_epochs == 0. Neural network architecture is:")
for batch_idx, data_batch in enumerate(train_dataset.dataloader):
data_batch = train_dataset.format_batch_data(data_batch, self.ctx)
with autograd.record():
output = self.model(data_batch)
labels = data_batch['label']
loss = self.loss_func(output, labels) / loss_scaling_factor
# print(str(nd.mean(loss).asscalar()), end="\r") # prints per-batch losses
loss.backward()
self.optimizer.step(labels.shape[0])
if batch_idx > 0:
break
self.model.save_parameters(self.net_filename)
logger.log(15, "untrained Neural Net saved to file")
return

# Training Loop:
for e in range(num_epochs):
if e == 0: # special actions during first epoch:
Expand Down

0 comments on commit 93a00ed

Please sign in to comment.